aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap3
-rw-r--r--CREDITS8
-rw-r--r--Documentation/ABI/stable/sysfs-block53
-rw-r--r--Documentation/admin-guide/cifs/usage.rst36
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt25
-rw-r--r--Documentation/arch/riscv/cmodx.rst4
-rw-r--r--Documentation/block/data-integrity.rst49
-rw-r--r--Documentation/block/writeback_cache_control.rst67
-rw-r--r--Documentation/devicetree/bindings/cache/qcom,llcc.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,edma.yaml4
-rw-r--r--Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml3
-rw-r--r--Documentation/driver-api/cxl/memory-devices.rst15
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--Documentation/filesystems/iomap/design.rst441
-rw-r--r--Documentation/filesystems/iomap/index.rst13
-rw-r--r--Documentation/filesystems/iomap/operations.rst713
-rw-r--r--Documentation/filesystems/iomap/porting.rst120
-rw-r--r--Documentation/filesystems/mount_api.rst9
-rw-r--r--Documentation/filesystems/proc.rst1
-rw-r--r--Documentation/i2c/i2c_bus.svg15
-rw-r--r--Documentation/i2c/summary.rst79
-rw-r--r--Documentation/kbuild/modules.rst8
-rw-r--r--Documentation/netlink/specs/ethtool.yaml7
-rw-r--r--Documentation/netlink/specs/nfsd.yaml2
-rw-r--r--Documentation/networking/devlink/devlink-region.rst2
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/userspace-api/mfd_noexec.rst86
-rw-r--r--Documentation/virt/hyperv/clocks.rst21
-rw-r--r--Documentation/virt/hyperv/overview.rst22
-rw-r--r--Documentation/virt/hyperv/vmbus.rst143
-rw-r--r--MAINTAINERS120
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso6
-rw-r--r--arch/arm/boot/dts/rockchip/rk3066a.dtsi1
-rw-r--r--arch/arm/include/asm/efi.h13
-rw-r--r--arch/arm/mach-davinci/pm.c2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-mek.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts1
-rw-r--r--arch/arm64/boot/dts/qcom/qdu1000.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/sa8775p.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8180x.dtsi11
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-crd.dts3
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts15
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm6115.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-crd.dts13
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-qcp.dts9
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100.dtsi10
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3368.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts2
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/asm-extable.h3
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/include/asm/uaccess.h169
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h11
-rw-r--r--arch/arm64/kernel/efi.c2
-rw-r--r--arch/arm64/kernel/mte.c12
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c2
-rw-r--r--arch/arm64/kernel/syscall.c16
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c15
-rw-r--r--arch/arm64/kvm/vgic/vgic.h2
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/csky/include/uapi/asm/unistd.h1
-rw-r--r--arch/csky/kernel/syscall.c2
-rw-r--r--arch/hexagon/include/asm/syscalls.h6
-rw-r--r--arch/hexagon/include/uapi/asm/unistd.h1
-rw-r--r--arch/hexagon/kernel/syscalltab.c7
-rw-r--r--arch/loongarch/Kconfig5
-rw-r--r--arch/loongarch/Kconfig.debug1
-rw-r--r--arch/loongarch/include/asm/hw_breakpoint.h4
-rw-r--r--arch/loongarch/kernel/hw_breakpoint.c96
-rw-r--r--arch/loongarch/kernel/ptrace.c47
-rw-r--r--arch/loongarch/kernel/syscall.c2
-rw-r--r--arch/loongarch/kvm/exit.c2
-rw-r--r--arch/m68k/emu/nfblock.c3
-rw-r--r--arch/microblaze/kernel/sys_microblaze.c2
-rw-r--r--arch/mips/bmips/setup.c3
-rw-r--r--arch/mips/include/asm/mipsmtregs.h2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl4
-rw-r--r--arch/mips/pci/ops-rc32434.c4
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/kernel/sys_parisc32.c9
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/crypto/.gitignore2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c7
-rw-r--r--arch/powerpc/kernel/head_64.S5
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kexec/core_64.c11
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c18
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c8
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi23
-rw-r--r--arch/riscv/boot/dts/canaan/k210_generic.dts5
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts10
-rw-r--r--arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts1
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi2
-rw-r--r--arch/riscv/include/asm/insn.h2
-rw-r--r--arch/riscv/kernel/ftrace.c7
-rw-r--r--arch/riscv/kernel/machine_kexec.c10
-rw-r--r--arch/riscv/kernel/patch.c26
-rw-r--r--arch/riscv/kernel/stacktrace.c5
-rw-r--r--arch/riscv/kernel/sys_riscv.c4
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c2
-rw-r--r--arch/s390/boot/startup.c11
-rw-r--r--arch/s390/configs/debug_defconfig5
-rw-r--r--arch/s390/configs/defconfig5
-rw-r--r--arch/s390/include/asm/entry-common.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kernel/syscall.c27
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/kvm-s390.h15
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/pci/pci_irq.c2
-rw-r--r--arch/sh/kernel/sys_sh32.c11
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/sparc/kernel/sys32.S221
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl8
-rw-r--r--arch/um/drivers/ubd_kern.c53
-rw-r--r--arch/x86/entry/entry_64_compat.S14
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h12
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/entry-common.h15
-rw-r--r--arch/x86/include/asm/runtime-const.h61
-rw-r--r--arch/x86/include/asm/word-at-a-time.h57
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/time.c20
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kvm/svm/svm.c4
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/platform/efi/memmap.c12
-rw-r--r--arch/xtensa/include/asm/current.h2
-rw-r--r--arch/xtensa/include/asm/thread_info.h2
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c5
-rw-r--r--block/Kconfig8
-rw-r--r--block/Makefile3
-rw-r--r--block/bdev.c41
-rw-r--r--block/bfq-cgroup.c51
-rw-r--r--block/bfq-iosched.c38
-rw-r--r--block/bfq-iosched.h3
-rw-r--r--block/bio-integrity.c135
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-cgroup.h13
-rw-r--r--block/blk-core.c45
-rw-r--r--block/blk-flush.c36
-rw-r--r--block/blk-integrity.c228
-rw-r--r--block/blk-lib.c210
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c92
-rw-r--r--block/blk-mq-debugfs.c13
-rw-r--r--block/blk-mq.c89
-rw-r--r--block/blk-settings.c549
-rw-r--r--block/blk-sysfs.c434
-rw-r--r--block/blk-throttle.c3
-rw-r--r--block/blk-wbt.c22
-rw-r--r--block/blk-zoned.c126
-rw-r--r--block/blk.h22
-rw-r--r--block/elevator.c9
-rw-r--r--block/elevator.h4
-rw-r--r--block/fops.c25
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c2
-rw-r--r--block/mq-deadline.c20
-rw-r--r--block/t10-pi.c302
-rw-r--r--drivers/acpi/acpica/exregion.c23
-rw-r--r--drivers/acpi/internal.h4
-rw-r--r--drivers/acpi/mipi-disco-img.c28
-rw-r--r--drivers/acpi/processor_idle.c37
-rw-r--r--drivers/ata/ahci.c25
-rw-r--r--drivers/ata/libata-core.c32
-rw-r--r--drivers/ata/libata-scsi.c3
-rw-r--r--drivers/ata/pata_macio.c4
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile3
-rw-r--r--drivers/block/amiflop.c6
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/ataflop.c6
-rw-r--r--drivers/block/brd.c7
-rw-r--r--drivers/block/drbd/drbd_main.c6
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/loop.c184
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/n64cart.c2
-rw-r--r--drivers/block/nbd.c26
-rw-r--r--drivers/block/null_blk/main.c29
-rw-r--r--drivers/block/null_blk/null_blk.h1
-rw-r--r--drivers/block/null_blk/zoned.c15
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/block/ps3disk.c8
-rw-r--r--drivers/block/rbd.c15
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c16
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c4
-rw-r--r--drivers/block/rnull.rs73
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/swim.c5
-rw-r--r--drivers/block/swim3.c5
-rw-r--r--drivers/block/ublk_drv.c22
-rw-r--r--drivers/block/virtio_blk.c68
-rw-r--r--drivers/block/xen-blkback/blkback.c1
-rw-r--r--drivers/block/xen-blkfront.c73
-rw-r--r--drivers/block/z2ram.c1
-rw-r--r--drivers/block/zram/zram_drv.c6
-rw-r--r--drivers/bluetooth/btintel_pcie.c2
-rw-r--r--drivers/bluetooth/btnxpuart.c2
-rw-r--r--drivers/bluetooth/hci_bcm4377.c10
-rw-r--r--drivers/bluetooth/hci_qca.c18
-rw-r--r--drivers/cdrom/cdrom.c1
-rw-r--r--drivers/cdrom/gdrom.c1
-rw-r--r--drivers/char/hpet.c34
-rw-r--r--drivers/char/tpm/Makefile2
-rw-r--r--drivers/char/tpm/tpm2-sessions.c419
-rw-r--r--drivers/clk/mediatek/clk-mt8183-mfgcfg.c1
-rw-r--r--drivers/clk/mediatek/clk-mtk.c24
-rw-r--r--drivers/clk/mediatek/clk-mtk.h2
-rw-r--r--drivers/clk/qcom/apss-ipq-pll.c2
-rw-r--r--drivers/clk/qcom/clk-alpha-pll.c3
-rw-r--r--drivers/clk/qcom/gcc-ipq9574.c10
-rw-r--r--drivers/clk/qcom/gcc-sm6350.c10
-rw-r--r--drivers/clk/sunxi-ng/ccu_common.c18
-rw-r--r--drivers/counter/ti-eqep.c6
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/intel_pstate.c13
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile5
-rw-r--r--drivers/cxl/core/hdm.c13
-rw-r--r--drivers/cxl/core/pmem.c16
-rw-r--r--drivers/cxl/core/region.c103
-rw-r--r--drivers/cxl/cxl.h6
-rw-r--r--drivers/cxl/cxlmem.h21
-rw-r--r--drivers/cxl/mem.c17
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/idxd/irq.c4
-rw-r--r--drivers/dma/ioat/init.c55
-rw-r--r--drivers/dma/ti/k3-udma-glue.c5
-rw-r--r--drivers/dma/xilinx/xdma.c4
-rw-r--r--drivers/firmware/cirrus/cs_dsp.c227
-rw-r--r--drivers/firmware/efi/memmap.c9
-rw-r--r--drivers/firmware/psci/psci.c4
-rw-r--r--drivers/firmware/sysfb.c12
-rw-r--r--drivers/gpio/gpio-davinci.c5
-rw-r--r--drivers/gpio/gpio-graniterapids.c2
-rw-r--r--drivers/gpio/gpio-mmio.c2
-rw-r--r--drivers/gpio/gpio-pca953x.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c28
-rw-r--r--drivers/gpio/gpiolib-of.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c5
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c65
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c72
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c10
-rw-r--r--drivers/gpu/drm/amd/display/include/dpcd_defs.h5
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h2
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c13
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c73
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511.h2
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_cec.c13
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c22
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c6
-rw-r--r--drivers/gpu/drm/drm_fbdev_dma.c5
-rw-r--r--drivers/gpu/drm/drm_fbdev_generic.c3
-rw-r--r--drivers/gpu/drm/drm_file.c8
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c9
-rw-r--r--drivers/gpu/drm/gma500/cdv_intel_lvds.c3
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_lvds.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c1
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c37
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv17.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c3
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c1
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c6
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c44
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c2
-rw-r--r--drivers/gpu/drm/radeon/sumo_dpm.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig2
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c12
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c47
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c6
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c4
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/hv/hv.c37
-rw-r--r--drivers/hv/hv_balloon.c190
-rw-r--r--drivers/i2c/Kconfig2
-rw-r--r--drivers/i2c/busses/Makefile6
-rw-r--r--drivers/i2c/busses/i2c-ocores.c2
-rw-r--r--drivers/i2c/busses/i2c-pnx.c48
-rw-r--r--drivers/i2c/busses/i2c-rcar.c27
-rw-r--r--drivers/i2c/busses/i2c-viai2c-common.c71
-rw-r--r--drivers/i2c/busses/i2c-viai2c-common.h2
-rw-r--r--drivers/i2c/busses/i2c-viai2c-wmt.c36
-rw-r--r--drivers/i2c/busses/i2c-viai2c-zhaoxin.c113
-rw-r--r--drivers/i2c/i2c-core-base.c1
-rw-r--r--drivers/i2c/i2c-slave-testunit.c12
-rw-r--r--drivers/iio/accel/Kconfig2
-rw-r--r--drivers/iio/adc/ad7266.c2
-rw-r--r--drivers/iio/adc/xilinx-ams.c8
-rw-r--r--drivers/iio/chemical/bme680.h2
-rw-r--r--drivers/iio/chemical/bme680_core.c62
-rw-r--r--drivers/iio/dac/Kconfig2
-rw-r--r--drivers/iio/humidity/hdc3020.c325
-rw-r--r--drivers/iio/industrialio-trigger.c2
-rw-r--r--drivers/iio/light/apds9306.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h4
-rw-r--r--drivers/infiniband/hw/mana/mr.c1
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c8
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/input/joystick/xpad.c1
-rw-r--r--drivers/input/mouse/elantech.c31
-rw-r--r--drivers/input/serio/i8042-acpipnpio.h18
-rw-r--r--drivers/input/touchscreen/ads7846.c12
-rw-r--r--drivers/input/touchscreen/ili210x.c4
-rw-r--r--drivers/iommu/amd/init.c1
-rw-r--r--drivers/iommu/amd/iommu.c12
-rw-r--r--drivers/iommu/intel/iommu.c20
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c5
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c4
-rw-r--r--drivers/md/bcache/super.c13
-rw-r--r--drivers/md/dm-cache-target.c1
-rw-r--r--drivers/md/dm-clone-target.c1
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c4
-rw-r--r--drivers/md/dm-integrity.c47
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-table.c351
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c2
-rw-r--r--drivers/md/dm-zone.c254
-rw-r--r--drivers/md/dm-zoned-target.c2
-rw-r--r--drivers/md/dm.c174
-rw-r--r--drivers/md/dm.h4
-rw-r--r--drivers/md/md-bitmap.c6
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c630
-rw-r--r--drivers/md/md.h136
-rw-r--r--drivers/md/raid0.c30
-rw-r--r--drivers/md/raid1.c34
-rw-r--r--drivers/md/raid10.c23
-rw-r--r--drivers/md/raid5.c114
-rw-r--r--drivers/media/pci/intel/ipu6/ipu6-isys-video.c2
-rw-r--r--drivers/media/pci/intel/ipu6/ipu6-isys.c2
-rw-r--r--drivers/media/pci/intel/ivsc/Kconfig1
-rw-r--r--drivers/mfd/axp20x.c1
-rw-r--r--drivers/misc/fastrpc.c41
-rw-r--r--drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c4
-rw-r--r--drivers/misc/mei/platform-vsc.c4
-rw-r--r--drivers/misc/mei/vsc-fw-loader.c2
-rw-r--r--drivers/misc/mei/vsc-tp.c18
-rw-r--r--drivers/mmc/core/block.c42
-rw-r--r--drivers/mmc/core/queue.c20
-rw-r--r--drivers/mmc/core/queue.h3
-rw-r--r--drivers/mmc/host/davinci_mmc.c3
-rw-r--r--drivers/mmc/host/moxart-mmc.c78
-rw-r--r--drivers/mmc/host/sdhci-brcmstb.c4
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c11
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c41
-rw-r--r--drivers/mmc/host/sdhci.c40
-rw-r--r--drivers/mtd/mtd_blkdevs.c9
-rw-r--r--drivers/mtd/nand/raw/nand_base.c66
-rw-r--r--drivers/mtd/nand/raw/rockchip-nand-controller.c6
-rw-r--r--drivers/net/bonding/bond_main.c3
-rw-r--r--drivers/net/bonding/bond_options.c6
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c14
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c55
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd.h5
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c3
-rw-r--r--drivers/net/dsa/lan9303-core.c23
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c61
-rw-r--r--drivers/net/dsa/microchip/ksz9477.h2
-rw-r--r--drivers/net/dsa/microchip/ksz9477_reg.h11
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c13
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h1
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h311
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c18
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c73
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c132
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ddp.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hwmon.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c20
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c131
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c6
-rw-r--r--drivers/net/ethernet/lantiq_etop.c4
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc.h8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c23
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h55
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_linecards.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c20
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c10
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c4
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ethtool.c44
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c48
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h28
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c55
-rw-r--r--drivers/net/ethernet/qualcomm/qca_debug.c6
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c16
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.h3
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c40
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c10
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c124
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c9
-rw-r--r--drivers/net/ntb_netdev.c2
-rw-r--r--drivers/net/phy/aquantia/aquantia.h5
-rw-r--r--drivers/net/phy/dp83tg720.c38
-rw-r--r--drivers/net/phy/micrel.c1
-rw-r--r--drivers/net/phy/microchip_t1.c2
-rw-r--r--drivers/net/phy/mxl-gpy.c58
-rw-r--r--drivers/net/ppp/ppp_generic.c15
-rw-r--r--drivers/net/pse-pd/Kconfig1
-rw-r--r--drivers/net/usb/ax88179_178a.c24
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/rtl8150.c3
-rw-r--r--drivers/net/virtio_net.c32
-rw-r--r--drivers/net/vxlan/vxlan_core.c9
-rw-r--r--drivers/net/wireguard/allowedips.c4
-rw-r--r--drivers/net/wireguard/queueing.h4
-rw-r--r--drivers/net/wireguard/send.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c2
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c3
-rw-r--r--drivers/net/wireless/ti/wlcore/cmd.c7
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c17
-rw-r--r--drivers/net/wireless/ti/wlcore/tx.c7
-rw-r--r--drivers/net/wireless/ti/wlcore/wlcore_i.h6
-rw-r--r--drivers/nvdimm/btt.c17
-rw-r--r--drivers/nvdimm/pmem.c14
-rw-r--r--drivers/nvme/host/Kconfig1
-rw-r--r--drivers/nvme/host/apple.c33
-rw-r--r--drivers/nvme/host/constants.c2
-rw-r--r--drivers/nvme/host/core.c286
-rw-r--r--drivers/nvme/host/fabrics.c25
-rw-r--r--drivers/nvme/host/fabrics.h1
-rw-r--r--drivers/nvme/host/fault_inject.c2
-rw-r--r--drivers/nvme/host/fc.c55
-rw-r--r--drivers/nvme/host/multipath.c144
-rw-r--r--drivers/nvme/host/nvme.h30
-rw-r--r--drivers/nvme/host/pci.c47
-rw-r--r--drivers/nvme/host/pr.c10
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/host/tcp.c31
-rw-r--r--drivers/nvme/host/zns.c3
-rw-r--r--drivers/nvme/target/Kconfig10
-rw-r--r--drivers/nvme/target/Makefile1
-rw-r--r--drivers/nvme/target/admin-cmd.c24
-rw-r--r--drivers/nvme/target/auth.c14
-rw-r--r--drivers/nvme/target/configfs.c41
-rw-r--r--drivers/nvme/target/core.c76
-rw-r--r--drivers/nvme/target/debugfs.c202
-rw-r--r--drivers/nvme/target/debugfs.h42
-rw-r--r--drivers/nvme/target/discovery.c14
-rw-r--r--drivers/nvme/target/fabrics-cmd-auth.c16
-rw-r--r--drivers/nvme/target/fabrics-cmd.c36
-rw-r--r--drivers/nvme/target/fc.c35
-rw-r--r--drivers/nvme/target/fcloop.c11
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c28
-rw-r--r--drivers/nvme/target/loop.c5
-rw-r--r--drivers/nvme/target/nvmet.h12
-rw-r--r--drivers/nvme/target/passthru.c10
-rw-r--r--drivers/nvme/target/rdma.c22
-rw-r--r--drivers/nvme/target/tcp.c18
-rw-r--r--drivers/nvme/target/zns.c30
-rw-r--r--drivers/nvmem/core.c7
-rw-r--r--drivers/nvmem/meson-efuse.c14
-rw-r--r--drivers/nvmem/rmem.c5
-rw-r--r--drivers/of/irq.c18
-rw-r--r--drivers/pci/msi/msi.c10
-rw-r--r--drivers/perf/riscv_pmu.c2
-rw-r--r--drivers/perf/riscv_pmu_sbi.c44
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-combo.c189
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h32
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h13
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp.h2
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm2835.c2
-rw-r--r--drivers/pinctrl/core.c2
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c68
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.h1
-rw-r--r--drivers/pinctrl/pinctrl-tps6594.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-gpio.c1
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rzg2l.c4
-rw-r--r--drivers/platform/mellanox/nvsw-sn2201.c5
-rw-r--r--drivers/platform/x86/amilo-rfkill.c1
-rw-r--r--drivers/platform/x86/firmware_attributes_class.c1
-rw-r--r--drivers/platform/x86/ibm_rtl.c1
-rw-r--r--drivers/platform/x86/intel/hid.c1
-rw-r--r--drivers/platform/x86/intel/pmc/pltdrv.c1
-rw-r--r--drivers/platform/x86/intel/rst.c1
-rw-r--r--drivers/platform/x86/intel/smartconnect.c1
-rw-r--r--drivers/platform/x86/intel/vbtn.c1
-rw-r--r--drivers/platform/x86/lg-laptop.c89
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc.c1
-rw-r--r--drivers/platform/x86/toshiba_acpi.c32
-rw-r--r--drivers/platform/x86/uv_sysfs.c1
-rw-r--r--drivers/platform/x86/wireless-hotkey.c3
-rw-r--r--drivers/platform/x86/xo1-rfkill.c1
-rw-r--r--drivers/pmdomain/qcom/rpmhpd.c7
-rw-r--r--drivers/ptp/ptp_sysfs.c3
-rw-r--r--drivers/pwm/pwm-stm32.c23
-rw-r--r--drivers/regulator/axp20x-regulator.c33
-rw-r--r--drivers/regulator/bd71815-regulator.c2
-rw-r--r--drivers/regulator/core.c1
-rw-r--r--drivers/regulator/tps6594-regulator.c12
-rw-r--r--drivers/reset/Kconfig1
-rw-r--r--drivers/reset/hisilicon/hi6220_reset.c1
-rw-r--r--drivers/s390/block/dasd_eckd.c4
-rw-r--r--drivers/s390/block/dasd_fba.c2
-rw-r--r--drivers/s390/block/dasd_genhd.c1
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c5
-rw-r--r--drivers/s390/char/sclp.c1
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c9
-rw-r--r--drivers/s390/virtio/virtio_ccw.c4
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/iscsi_tcp.c8
-rw-r--r--drivers/scsi/libsas/sas_ata.c6
-rw-r--r--drivers/scsi/libsas/sas_discover.c2
-rw-r--r--drivers/scsi/libsas/sas_internal.h14
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c11
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c6
-rw-r--r--drivers/scsi/scsi_debug.c594
-rw-r--r--drivers/scsi/scsi_lib.c9
-rw-r--r--drivers/scsi/scsi_trace.c22
-rw-r--r--drivers/scsi/sd.c382
-rw-r--r--drivers/scsi/sd.h31
-rw-r--r--drivers/scsi/sd_dif.c45
-rw-r--r--drivers/scsi/sd_zbc.c52
-rw-r--r--drivers/scsi/sr.c42
-rw-r--r--drivers/soc/litex/Kconfig2
-rw-r--r--drivers/soc/litex/litex_soc_ctrl.c4
-rw-r--r--drivers/soc/qcom/pmic_glink.c4
-rw-r--r--drivers/soc/tegra/fuse/fuse-tegra.c4
-rw-r--r--drivers/soundwire/amd_manager.c3
-rw-r--r--drivers/soundwire/intel_auxdevice.c6
-rw-r--r--drivers/soundwire/mipi_disco.c30
-rw-r--r--drivers/spi/spi-axi-spi-engine.c26
-rw-r--r--drivers/spi/spi-cs42l43.c6
-rw-r--r--drivers/spi/spi-davinci.c6
-rw-r--r--drivers/spi/spi-imx.c16
-rw-r--r--drivers/spi/spi-mux.c2
-rw-r--r--drivers/spi/spi-omap2-mcspi.c15
-rw-r--r--drivers/spi/spi-stm32-qspi.c12
-rw-r--r--drivers/spi/spi.c36
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c4
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h5
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c2
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c7
-rw-r--r--drivers/target/target_core_iblock.c49
-rw-r--r--drivers/tee/optee/ffa_abi.c12
-rw-r--r--drivers/thermal/gov_power_allocator.c3
-rw-r--r--drivers/thermal/gov_step_wise.c23
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c3
-rw-r--r--drivers/thermal/mediatek/lvts_thermal.c12
-rw-r--r--drivers/thermal/thermal_core.c42
-rw-r--r--drivers/thermal/thermal_core.h10
-rw-r--r--drivers/tty/mxser.c2
-rw-r--r--drivers/tty/serial/8250/8250_core.c5
-rw-r--r--drivers/tty/serial/8250/8250_omap.c22
-rw-r--r--drivers/tty/serial/8250/8250_pci.c13
-rw-r--r--drivers/tty/serial/bcm63xx_uart.c7
-rw-r--r--drivers/tty/serial/imx.c61
-rw-r--r--drivers/tty/serial/ma35d1_serial.c13
-rw-r--r--drivers/tty/serial/mcf.c2
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c51
-rw-r--r--drivers/tty/serial/serial_base.h30
-rw-r--r--drivers/tty/serial/serial_base_bus.c129
-rw-r--r--drivers/tty/serial/serial_core.c4
-rw-r--r--drivers/ufs/core/ufs-mcq.c11
-rw-r--r--drivers/ufs/core/ufshcd.c13
-rw-r--r--drivers/usb/atm/cxacru.c14
-rw-r--r--drivers/usb/core/config.c18
-rw-r--r--drivers/usb/core/of.c7
-rw-r--r--drivers/usb/core/quirks.c3
-rw-r--r--drivers/usb/dwc3/core.c26
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c8
-rw-r--r--drivers/usb/gadget/configfs.c3
-rw-r--r--drivers/usb/gadget/function/f_printer.c40
-rw-r--r--drivers/usb/gadget/function/u_ether.c4
-rw-r--r--drivers/usb/gadget/udc/aspeed_udc.c4
-rw-r--r--drivers/usb/host/xhci.c16
-rw-r--r--drivers/usb/musb/da8xx.c8
-rw-r--r--drivers/usb/serial/mos7840.c45
-rw-r--r--drivers/usb/serial/option.c38
-rw-r--r--drivers/usb/storage/scsiglue.c6
-rw-r--r--drivers/usb/storage/uas.c7
-rw-r--r--drivers/usb/typec/ucsi/ucsi_acpi.c61
-rw-r--r--drivers/usb/typec/ucsi/ucsi_glink.c5
-rw-r--r--drivers/usb/typec/ucsi/ucsi_stm32g0.c19
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c2
-rw-r--r--drivers/watchdog/Kconfig1
-rw-r--r--drivers/watchdog/menz69_wdt.c1
-rw-r--r--drivers/watchdog/omap_wdt.c1
-rw-r--r--drivers/watchdog/simatic-ipc-wdt.c1
-rw-r--r--drivers/watchdog/ts4800_wdt.c1
-rw-r--r--drivers/watchdog/twl4030_wdt.c1
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/aio.c8
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs/init.c1
-rw-r--r--fs/autofs/inode.c16
-rw-r--r--fs/bcachefs/alloc_background.c337
-rw-r--r--fs/bcachefs/alloc_background.h14
-rw-r--r--fs/bcachefs/alloc_foreground.c6
-rw-r--r--fs/bcachefs/backpointers.c70
-rw-r--r--fs/bcachefs/bcachefs.h21
-rw-r--r--fs/bcachefs/bcachefs_format.h13
-rw-r--r--fs/bcachefs/bkey.c7
-rw-r--r--fs/bcachefs/bkey.h7
-rw-r--r--fs/bcachefs/bkey_methods.c6
-rw-r--r--fs/bcachefs/bkey_methods.h3
-rw-r--r--fs/bcachefs/btree_gc.c54
-rw-r--r--fs/bcachefs/btree_iter.c31
-rw-r--r--fs/bcachefs/btree_locking.c10
-rw-r--r--fs/bcachefs/btree_locking.h22
-rw-r--r--fs/bcachefs/btree_types.h17
-rw-r--r--fs/bcachefs/btree_write_buffer.c37
-rw-r--r--fs/bcachefs/btree_write_buffer.h3
-rw-r--r--fs/bcachefs/buckets.c2
-rw-r--r--fs/bcachefs/buckets.h8
-rw-r--r--fs/bcachefs/chardev.c9
-rw-r--r--fs/bcachefs/clock.c7
-rw-r--r--fs/bcachefs/data_update.c44
-rw-r--r--fs/bcachefs/data_update.h5
-rw-r--r--fs/bcachefs/debug.c113
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c19
-rw-r--r--fs/bcachefs/error.h7
-rw-r--r--fs/bcachefs/eytzinger.h6
-rw-r--r--fs/bcachefs/fs-ioctl.c2
-rw-r--r--fs/bcachefs/fs.c30
-rw-r--r--fs/bcachefs/io_misc.c2
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/journal.c26
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c32
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c2
-rw-r--r--fs/bcachefs/lru.c39
-rw-r--r--fs/bcachefs/lru.h6
-rw-r--r--fs/bcachefs/move.c25
-rw-r--r--fs/bcachefs/opts.h2
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/sb-downgrade.c2
-rw-r--r--fs/bcachefs/sb-errors.c14
-rw-r--r--fs/bcachefs/sb-errors_format.h560
-rw-r--r--fs/bcachefs/seqmutex.h11
-rw-r--r--fs/bcachefs/snapshot.c12
-rw-r--r--fs/bcachefs/str_hash.h2
-rw-r--r--fs/bcachefs/super-io.c7
-rw-r--r--fs/bcachefs/super.c30
-rw-r--r--fs/bcachefs/util.c25
-rw-r--r--fs/bcachefs/util.h1
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c5
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/binfmt_script.c1
-rw-r--r--fs/btrfs/bio.c4
-rw-r--r--fs/btrfs/block-group.c20
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/extent_map.c123
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/fs.h1
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c14
-rw-r--r--fs/btrfs/ref-verify.c9
-rw-r--r--fs/btrfs/scrub.c24
-rw-r--r--fs/btrfs/space-info.c24
-rw-r--r--fs/btrfs/tree-log.c43
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/cachefiles/cache.c45
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/ondemand.c52
-rw-r--r--fs/cachefiles/volume.c1
-rw-r--r--fs/cachefiles/xattr.c5
-rw-r--r--fs/coda/symlink.c10
-rw-r--r--fs/cramfs/inode.c26
-rw-r--r--fs/dcache.c90
-rw-r--r--fs/debugfs/inode.c16
-rw-r--r--fs/efivarfs/super.c12
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/efs/symlink.c14
-rw-r--r--fs/erofs/super.c2
-rw-r--r--fs/erofs/zmap.c2
-rw-r--r--fs/erofs/zutil.c8
-rw-r--r--fs/exec.c14
-rw-r--r--fs/exfat/super.c8
-rw-r--r--fs/exportfs/expfs.c9
-rw-r--r--fs/ext4/crypto.c10
-rw-r--r--fs/ext4/ext4.h35
-rw-r--r--fs/ext4/namei.c122
-rw-r--r--fs/ext4/super.c26
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/dir.c105
-rw-r--r--fs/f2fs/f2fs.h16
-rw-r--r--fs/f2fs/file.c6
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/recovery.c9
-rw-r--r--fs/f2fs/super.c8
-rw-r--r--fs/fat/fat.h18
-rw-r--r--fs/fat/fat_test.c1
-rw-r--r--fs/fat/inode.c675
-rw-r--r--fs/fat/namei_msdos.c38
-rw-r--r--fs/fat/namei_vfat.c38
-rw-r--r--fs/fhandle.c178
-rw-r--r--fs/fs_parser.c34
-rw-r--r--fs/fsopen.c7
-rw-r--r--fs/fuse/acl.c4
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/hfs/inode.c3
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/bfind.c15
-rw-r--r--fs/hfsplus/extents.c9
-rw-r--r--fs/hfsplus/hfsplus_fs.h21
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hfsplus/xattr.c2
-rw-r--r--fs/hostfs/hostfs_kern.c106
-rw-r--r--fs/hpfs/namei.c15
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c109
-rw-r--r--fs/internal.h16
-rw-r--r--fs/iomap/buffered-io.c81
-rw-r--r--fs/isofs/inode.c16
-rw-r--r--fs/isofs/rock.c17
-rw-r--r--fs/jffs2/file.c14
-rw-r--r--fs/libfs.c74
-rw-r--r--fs/locks.c11
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/namei.c3
-rw-r--r--fs/mount.h3
-rw-r--r--fs/mpage.c13
-rw-r--r--fs/namei.c249
-rw-r--r--fs/namespace.c524
-rw-r--r--fs/netfs/buffered_read.c14
-rw-r--r--fs/netfs/buffered_write.c24
-rw-r--r--fs/netfs/direct_read.c2
-rw-r--r--fs/netfs/direct_write.c11
-rw-r--r--fs/netfs/fscache_cache.c4
-rw-r--r--fs/netfs/fscache_cookie.c28
-rw-r--r--fs/netfs/fscache_io.c12
-rw-r--r--fs/netfs/fscache_main.c2
-rw-r--r--fs/netfs/fscache_volume.c18
-rw-r--r--fs/netfs/internal.h44
-rw-r--r--fs/netfs/io.c12
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c85
-rw-r--r--fs/netfs/write_collect.c16
-rw-r--r--fs/netfs/write_issue.c38
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/symlink.c10
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/nfsd/netlink.c2
-rw-r--r--fs/nfsd/netlink.h3
-rw-r--r--fs/nfsd/nfsctl.c50
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nilfs2/alloc.c19
-rw-r--r--fs/nilfs2/alloc.h4
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/dir.c38
-rw-r--r--fs/nilfs2/ifile.c7
-rw-r--r--fs/nilfs2/nilfs.h10
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/nls/mac-celtic.c1
-rw-r--r--fs/nls/mac-centeuro.c1
-rw-r--r--fs/nls/mac-croatian.c1
-rw-r--r--fs/nls/mac-cyrillic.c1
-rw-r--r--fs/nls/mac-gaelic.c1
-rw-r--r--fs/nls/mac-greek.c1
-rw-r--r--fs/nls/mac-iceland.c1
-rw-r--r--fs/nls/mac-inuit.c1
-rw-r--r--fs/nls/mac-roman.c1
-rw-r--r--fs/nls/mac-romanian.c1
-rw-r--r--fs/nls/mac-turkish.c1
-rw-r--r--fs/nls/nls_ascii.c1
-rw-r--r--fs/nls/nls_base.c1
-rw-r--r--fs/nls/nls_cp1250.c1
-rw-r--r--fs/nls/nls_cp1251.c1
-rw-r--r--fs/nls/nls_cp1255.c1
-rw-r--r--fs/nls/nls_cp437.c1
-rw-r--r--fs/nls/nls_cp737.c1
-rw-r--r--fs/nls/nls_cp775.c1
-rw-r--r--fs/nls/nls_cp850.c1
-rw-r--r--fs/nls/nls_cp852.c1
-rw-r--r--fs/nls/nls_cp855.c1
-rw-r--r--fs/nls/nls_cp857.c1
-rw-r--r--fs/nls/nls_cp860.c1
-rw-r--r--fs/nls/nls_cp861.c1
-rw-r--r--fs/nls/nls_cp862.c1
-rw-r--r--fs/nls/nls_cp863.c1
-rw-r--r--fs/nls/nls_cp864.c1
-rw-r--r--fs/nls/nls_cp865.c1
-rw-r--r--fs/nls/nls_cp866.c1
-rw-r--r--fs/nls/nls_cp869.c1
-rw-r--r--fs/nls/nls_cp874.c1
-rw-r--r--fs/nls/nls_cp932.c1
-rw-r--r--fs/nls/nls_cp936.c1
-rw-r--r--fs/nls/nls_cp949.c1
-rw-r--r--fs/nls/nls_cp950.c1
-rw-r--r--fs/nls/nls_euc-jp.c1
-rw-r--r--fs/nls/nls_iso8859-1.c1
-rw-r--r--fs/nls/nls_iso8859-13.c1
-rw-r--r--fs/nls/nls_iso8859-14.c1
-rw-r--r--fs/nls/nls_iso8859-15.c1
-rw-r--r--fs/nls/nls_iso8859-2.c1
-rw-r--r--fs/nls/nls_iso8859-3.c1
-rw-r--r--fs/nls/nls_iso8859-4.c1
-rw-r--r--fs/nls/nls_iso8859-5.c1
-rw-r--r--fs/nls/nls_iso8859-6.c1
-rw-r--r--fs/nls/nls_iso8859-7.c1
-rw-r--r--fs/nls/nls_iso8859-9.c1
-rw-r--r--fs/nls/nls_koi8-r.c1
-rw-r--r--fs/nls/nls_koi8-ru.c1
-rw-r--r--fs/nls/nls_koi8-u.c1
-rw-r--r--fs/nls/nls_ucs2_utils.c1
-rw-r--r--fs/nls/nls_utf8.c1
-rw-r--r--fs/nsfs.c122
-rw-r--r--fs/ntfs3/super.c12
-rw-r--r--fs/ocfs2/aops.c5
-rw-r--r--fs/ocfs2/journal.c209
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/ocfs2.h27
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/open.c43
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/orangefs/inode.c13
-rw-r--r--fs/orangefs/orangefs-bufmap.c4
-rw-r--r--fs/overlayfs/dir.c8
-rw-r--r--fs/overlayfs/export.c6
-rw-r--r--fs/pidfs.c90
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/proc_namespace.c6
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/quota/dquot.c8
-rw-r--r--fs/read_write.c18
-rw-r--r--fs/readdir.c4
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/romfs/super.c22
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/smb/client/cifsglob.h7
-rw-r--r--fs/smb/client/cifssmb.c8
-rw-r--r--fs/smb/client/file.c57
-rw-r--r--fs/smb/client/fs_context.c39
-rw-r--r--fs/smb/client/smb2pdu.c19
-rw-r--r--fs/smb/common/smb2pdu.h34
-rw-r--r--fs/smb/server/smb2pdu.c22
-rw-r--r--fs/stat.c206
-rw-r--r--fs/super.c11
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/tracefs/inode.c16
-rw-r--r--fs/ufs/dir.c1
-rw-r--r--fs/userfaultfd.c7
-rw-r--r--fs/vboxsf/file.c18
-rw-r--r--fs/vboxsf/super.c16
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_fs.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c23
-rw-r--r--fs/xfs/xfs_bmap_util.c30
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_icache.c7
-rw-r--r--fs/xfs/xfs_inode.c47
-rw-r--r--fs/xfs/xfs_iomap.c34
-rw-r--r--fs/xfs/xfs_iops.c15
-rw-r--r--include/asm-generic/Kbuild1
-rw-r--r--include/asm-generic/runtime-const.h15
-rw-r--r--include/asm-generic/syscalls.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h8
-rw-r--r--include/linux/blk-integrity.h87
-rw-r--r--include/linux/blk_types.h8
-rw-r--r--include/linux/blkdev.h348
-rw-r--r--include/linux/bpf_verifier.h2
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/bvec.h14
-rw-r--r--include/linux/cleanup.h13
-rw-r--r--include/linux/closure.h30
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/compiler_types.h23
-rw-r--r--include/linux/dcache.h11
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/linux/exportfs.h2
-rw-r--r--include/linux/file.h20
-rw-r--r--include/linux/filter.h14
-rw-r--r--include/linux/fs.h116
-rw-r--r--include/linux/fs_parser.h6
-rw-r--r--include/linux/fscache-cache.h6
-rw-r--r--include/linux/fsnotify.h8
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/i2c.h24
-rw-r--r--include/linux/io_uring_types.h15
-rw-r--r--include/linux/kcov.h2
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/lsm_hook_defs.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/mmzone.h12
-rw-r--r--include/linux/module.h14
-rw-r--r--include/linux/namei.h8
-rw-r--r--include/linux/nsproxy.h13
-rw-r--r--include/linux/numa.h5
-rw-r--r--include/linux/nvme-fc-driver.h4
-rw-r--r--include/linux/nvme.h25
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/page_ref.h57
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/path.h9
-rw-r--r--include/linux/pgalloc_tag.h11
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/security.h5
-rw-r--r--include/linux/serial_core.h21
-rw-r--r--include/linux/socket.h3
-rw-r--r--include/linux/spi/spi.h9
-rw-r--r--include/linux/stat.h3
-rw-r--r--include/linux/string.h2
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/syscalls.h26
-rw-r--r--include/linux/t10-pi.h22
-rw-r--r--include/linux/tpm.h81
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--include/net/bluetooth/hci.h11
-rw-r--r--include/net/bluetooth/hci_sync.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/mac80211.h2
-rw-r--r--include/net/netfilter/nf_tables.h5
-rw-r--r--include/net/netns/netfilter.h3
-rw-r--r--include/net/tcx.h13
-rw-r--r--include/scsi/scsi_devinfo.h4
-rw-r--r--include/scsi/scsi_proto.h1
-rw-r--r--include/sound/dmaengine_pcm.h1
-rw-r--r--include/trace/events/block.h41
-rw-r--r--include/trace/events/btrfs.h18
-rw-r--r--include/trace/events/fscache.h4
-rw-r--r--include/trace/events/qdisc.h2
-rw-r--r--include/trace/events/scsi.h1
-rw-r--r--include/uapi/asm-generic/unistd.h2
-rw-r--r--include/uapi/drm/panthor_drm.h5
-rw-r--r--include/uapi/drm/xe_drm.h8
-rw-r--r--include/uapi/linux/fs.h5
-rw-r--r--include/uapi/linux/io_uring.h2
-rw-r--r--include/uapi/linux/mount.h10
-rw-r--r--include/uapi/linux/nsfs.h10
-rw-r--r--include/uapi/linux/pidfd.h14
-rw-r--r--include/uapi/linux/stat.h10
-rw-r--r--include/uapi/linux/trace_mmap.h2
-rw-r--r--include/uapi/misc/fastrpc.h3
-rw-r--r--init/Kconfig2
-rw-r--r--io_uring/Makefile6
-rw-r--r--io_uring/advise.c16
-rw-r--r--io_uring/eventfd.c160
-rw-r--r--io_uring/eventfd.h8
-rw-r--r--io_uring/io-wq.c29
-rw-r--r--io_uring/io-wq.h2
-rw-r--r--io_uring/io_uring.c154
-rw-r--r--io_uring/io_uring.h9
-rw-r--r--io_uring/msg_ring.c122
-rw-r--r--io_uring/msg_ring.h1
-rw-r--r--io_uring/napi.c2
-rw-r--r--io_uring/net.c104
-rw-r--r--io_uring/net.h6
-rw-r--r--io_uring/opdef.c34
-rw-r--r--io_uring/opdef.h4
-rw-r--r--io_uring/register.c65
-rw-r--r--io_uring/rsrc.c64
-rw-r--r--io_uring/rw.c9
-rw-r--r--io_uring/statx.c3
-rw-r--r--io_uring/xattr.c4
-rw-r--r--ipc/mqueue.c3
-rw-r--r--kernel/auditfilter.c5
-rw-r--r--kernel/bpf/arena.c16
-rw-r--r--kernel/bpf/bpf_local_storage.c4
-rw-r--r--kernel/bpf/core.c7
-rw-r--r--kernel/bpf/helpers.c99
-rw-r--r--kernel/bpf/ringbuf.c31
-rw-r--r--kernel/bpf/verifier.c86
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c26
-rw-r--r--kernel/gcov/gcc_4_7.c4
-rw-r--r--kernel/kallsyms.c23
-rw-r--r--kernel/kcov.c1
-rw-r--r--kernel/module/kallsyms.c25
-rw-r--r--kernel/pid_namespace.c1
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/conopt.c146
-rw-r--r--kernel/printk/console_cmdline.h6
-rw-r--r--kernel/printk/printk.c23
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/deadline.c7
-rw-r--r--kernel/sched/fair.c12
-rw-r--r--kernel/sched/psi.c21
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/sched/stats.h11
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ftrace.c13
-rw-r--r--kernel/workqueue.c51
-rw-r--r--lib/Kconfig8
-rw-r--r--lib/Makefile4
-rw-r--r--lib/alloc_tag.c16
-rwxr-xr-xlib/build_OID_registry4
-rw-r--r--lib/closure.c57
-rw-r--r--lib/debugobjects.c21
-rw-r--r--lib/fortify_kunit.c2
-rw-r--r--lib/overflow_kunit.c20
-rw-r--r--lib/string_helpers_kunit.c1
-rw-r--r--lib/string_kunit.c1
-rw-r--r--mm/compaction.c11
-rw-r--r--mm/damon/core.c23
-rw-r--r--mm/debug_vm_pgtable.c31
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/gup.c291
-rw-r--r--mm/huge_memory.c30
-rw-r--r--mm/hugetlb.c70
-rw-r--r--mm/hugetlb_vmemmap.c16
-rw-r--r--mm/internal.h10
-rw-r--r--mm/kasan/common.c2
-rw-r--r--mm/khugepaged.c10
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c14
-rw-r--r--mm/memory.c23
-rw-r--r--mm/migrate.c26
-rw-r--r--mm/mm_init.c43
-rw-r--r--mm/page-writeback.c32
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/page_table_check.c11
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c38
-rw-r--r--mm/slub.c7
-rw-r--r--mm/util.c4
-rw-r--r--mm/vmalloc.c31
-rw-r--r--mm/workingset.c14
-rw-r--r--net/batman-adv/originator.c27
-rw-r--r--net/batman-adv/translation-table.c47
-rw-r--r--net/bluetooth/hci_conn.c15
-rw-r--r--net/bluetooth/hci_core.c76
-rw-r--r--net/bluetooth/hci_event.c33
-rw-r--r--net/bluetooth/hci_sync.c13
-rw-r--r--net/bluetooth/iso.c3
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/l2cap_sock.c14
-rw-r--r--net/can/j1939/main.c6
-rw-r--r--net/can/j1939/transport.c21
-rw-r--r--net/ceph/crush/mapper.c7
-rw-r--r--net/ceph/mon_client.c14
-rw-r--r--net/core/datagram.c20
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/net_namespace.c9
-rw-r--r--net/core/netdev-genl.c16
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/ethtool/ioctl.c3
-rw-r--r--net/ethtool/linkstate.c41
-rw-r--r--net/ipv4/cipso_ipv4.c75
-rw-r--r--net/ipv4/inet_connection_sock.c17
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/tcp_ao.c6
-rw-r--r--net/ipv4/tcp_input.c64
-rw-r--r--net/ipv4/tcp_metrics.c1
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/seg6_local.c8
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/mac80211/driver-ops.c17
-rw-r--r--net/mac80211/iface.c22
-rw-r--r--net/mac80211/main.c1
-rw-r--r--net/mac80211/scan.c17
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mac802154/main.c14
-rw-r--r--net/mac802154/tx.c8
-rw-r--r--net/netfilter/core.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c11
-rw-r--r--net/netfilter/nf_conntrack_standalone.c15
-rw-r--r--net/netfilter/nf_hooks_lwtunnel.c70
-rw-r--r--net/netfilter/nf_internals.h6
-rw-r--r--net/netfilter/nf_tables_api.c169
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_lookup.c3
-rw-r--r--net/netrom/nr_timer.c3
-rw-r--r--net/openvswitch/conntrack.c7
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/act_ct.c24
-rw-r--r--net/sched/sch_ingress.c12
-rw-r--r--net/socket.c48
-rw-r--r--net/sunrpc/svc.c5
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/tipc/node.c1
-rw-r--r--net/unix/af_unix.c37
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/scan.c12
-rw-r--r--rust/bindings/bindings_helper.h5
-rw-r--r--rust/helpers.c16
-rw-r--r--rust/kernel/alloc/vec_ext.rs7
-rw-r--r--rust/kernel/block.rs5
-rw-r--r--rust/kernel/block/mq.rs98
-rw-r--r--rust/kernel/block/mq/gen_disk.rs198
-rw-r--r--rust/kernel/block/mq/operations.rs245
-rw-r--r--rust/kernel/block/mq/raw_writer.rs55
-rw-r--r--rust/kernel/block/mq/request.rs253
-rw-r--r--rust/kernel/block/mq/tag_set.rs86
-rw-r--r--rust/kernel/error.rs6
-rw-r--r--rust/kernel/lib.rs2
-rw-r--r--scripts/Makefile.dtbinst2
-rw-r--r--scripts/Makefile.host2
-rw-r--r--scripts/Makefile.package2
-rw-r--r--scripts/gdb/linux/Makefile2
-rwxr-xr-xscripts/ld-version.sh8
-rw-r--r--scripts/package/kernel.spec9
-rw-r--r--security/Kconfig.hardening15
-rw-r--r--security/apparmor/audit.c6
-rw-r--r--security/apparmor/include/audit.h2
-rw-r--r--security/integrity/ima/ima.h2
-rw-r--r--security/integrity/ima/ima_fs.c3
-rw-r--r--security/integrity/ima/ima_policy.c15
-rw-r--r--security/security.c6
-rw-r--r--security/selinux/include/audit.h4
-rw-r--r--security/selinux/ss/services.c5
-rw-r--r--security/smack/smack_lsm.c4
-rw-r--r--security/yama/yama_lsm.c1
-rw-r--r--sound/core/pcm_dmaengine.c22
-rw-r--r--sound/core/pcm_native.c2
-rw-r--r--sound/core/seq/seq_ump_convert.c12
-rw-r--r--sound/hda/intel-dsp-config.c2
-rw-r--r--sound/oss/dmasound/dmasound_core.c1
-rw-r--r--sound/pci/hda/Kconfig2
-rw-r--r--sound/pci/hda/cs35l41_hda.c6
-rw-r--r--sound/pci/hda/cs35l41_hda_property.c8
-rw-r--r--sound/pci/hda/cs35l56_hda.c9
-rw-r--r--sound/pci/hda/patch_realtek.c66
-rw-r--r--sound/pci/hda/tas2781_hda_i2c.c4
-rw-r--r--sound/soc/amd/acp/acp-i2s.c8
-rw-r--r--sound/soc/amd/acp/acp-pci.c12
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c7
-rw-r--r--sound/soc/atmel/atmel-classd.c7
-rw-r--r--sound/soc/codecs/cs35l56-shared.c4
-rw-r--r--sound/soc/codecs/cs42l43-jack.c4
-rw-r--r--sound/soc/codecs/es8326.c8
-rw-r--r--sound/soc/codecs/rt5645.c24
-rw-r--r--sound/soc/codecs/rt5645.h6
-rw-r--r--sound/soc/codecs/rt711-sdw.c2
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c4
-rw-r--r--sound/soc/fsl/fsl-asoc-card.c3
-rw-r--r--sound/soc/fsl/imx-pcm-dma.c1
-rw-r--r--sound/soc/intel/avs/topology.c19
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c11
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-mtl-match.c2
-rw-r--r--sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c10
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-mt6359.c1
-rw-r--r--sound/soc/mxs/mxs-pcm.c1
-rw-r--r--sound/soc/qcom/qdsp6/q6apm-lpass-dais.c32
-rw-r--r--sound/soc/qcom/sdw.c1
-rw-r--r--sound/soc/rockchip/rockchip_i2s_tdm.c13
-rw-r--r--sound/soc/soc-generic-dmaengine-pcm.c8
-rw-r--r--sound/soc/soc-topology.c35
-rw-r--r--sound/soc/sof/intel/hda-dai.c18
-rw-r--r--sound/soc/sof/intel/hda-pcm.c6
-rw-r--r--sound/soc/sof/sof-audio.c2
-rw-r--r--sound/soc/ti/davinci-mcasp.c9
-rw-r--r--sound/soc/ti/omap-hdmi.c6
-rw-r--r--tools/hv/Makefile1
-rw-r--r--tools/perf/util/comm.c29
-rw-r--r--tools/perf/util/dsos.c26
-rw-r--r--tools/power/x86/turbostat/Makefile6
-rw-r--r--tools/power/x86/turbostat/turbostat.c20
-rw-r--r--tools/testing/cxl/test/cxl.c4
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c41
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/config8
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h46
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c156
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c364
-rw-r--r--tools/testing/selftests/kselftest_harness.h43
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h1
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c1
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c15
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c1
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_init2_tests.c4
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c38
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c24
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/config3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c25
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh46
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c14
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh2
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh335
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh340
-rw-r--r--tools/testing/selftests/openat2/Makefile14
-rw-r--r--tools/testing/selftests/powerpc/flags.mk5
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c32
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c6
-rw-r--r--tools/testing/selftests/timens/exec.c6
-rw-r--r--tools/testing/selftests/timens/timer.c2
-rw-r--r--tools/testing/selftests/timens/timerfd.c2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c4
-rw-r--r--tools/testing/selftests/vDSO/Makefile29
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c16
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c18
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile8
-rw-r--r--virt/kvm/dirty_ring.c3
-rw-r--r--virt/kvm/guest_memfd.c5
-rw-r--r--virt/kvm/kvm_main.c19
1345 files changed, 23266 insertions, 12098 deletions
diff --git a/.mailmap b/.mailmap
index 3f67060fd5e9..29a5fedee49c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -384,6 +384,7 @@ Li Yang <[email protected]> <[email protected]>
Lorenzo Pieralisi <[email protected]> <[email protected]>
@@ -608,6 +609,7 @@ Simon Kelley <[email protected]>
Sricharan Ramabadhran <[email protected]> <[email protected]>
+Stanislav Fomichev <[email protected]> <[email protected]>
Stéphane Witzmann <[email protected]>
Stephen Hemminger <[email protected]> <[email protected]>
@@ -688,6 +690,7 @@ Vivien Didelot <[email protected]> <[email protected]>
diff --git a/CREDITS b/CREDITS
index 0107047f807b..f87c0fa62cfc 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1214,6 +1214,10 @@ D: UDF filesystem
S: (ask for current address)
S: USA
+N: Larry Finger
+D: Maintainer of wireless drivers, too many to list here
+
N: Jürgen Fischer
D: Author of Adaptec AHA-152x SCSI driver
@@ -3146,9 +3150,11 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
-N: Gustavo Pimental
+N: Gustavo Pimentel
D: PCI driver for Synopsys DesignWare
+D: Synopsys DesignWare eDMA driver
+D: Synopsys DesignWare xData traffic generator
N: Emanuel Pirker
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 831f19a32e08..cea8856f798d 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -21,6 +21,59 @@ Description:
device is offset from the internal allocation unit's
natural alignment.
+What: /sys/block/<disk>/atomic_write_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <[email protected]>
+Description:
+ [RO] This parameter specifies the maximum atomic write
+ size reported by the device. This parameter is relevant
+ for merging of writes, where a merged atomic write
+ operation must not exceed this number of bytes.
+ This parameter may be greater than the value in
+ atomic_write_unit_max_bytes as
+ atomic_write_unit_max_bytes will be rounded down to a
+ power-of-two and atomic_write_unit_max_bytes may also be
+ limited by some other queue limits, such as max_segments.
+ This parameter - along with atomic_write_unit_min_bytes
+ and atomic_write_unit_max_bytes - will not be larger than
+ max_hw_sectors_kb, but may be larger than max_sectors_kb.
+
+
+What: /sys/block/<disk>/atomic_write_unit_min_bytes
+Date: February 2024
+Contact: Himanshu Madhani <[email protected]>
+Description:
+ [RO] This parameter specifies the smallest block which can
+ be written atomically with an atomic write operation. All
+ atomic write operations must begin at a
+ atomic_write_unit_min boundary and must be multiples of
+ atomic_write_unit_min. This value must be a power-of-two.
+
+
+What: /sys/block/<disk>/atomic_write_unit_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <[email protected]>
+Description:
+ [RO] This parameter defines the largest block which can be
+ written atomically with an atomic write operation. This
+ value must be a multiple of atomic_write_unit_min and must
+ be a power-of-two. This value will not be larger than
+ atomic_write_max_bytes.
+
+
+What: /sys/block/<disk>/atomic_write_boundary_bytes
+Date: February 2024
+Contact: Himanshu Madhani <[email protected]>
+Description:
+ [RO] A device may need to internally split an atomic write I/O
+ which straddles a given logical block address boundary. This
+ parameter specifies the size in bytes of the atomic boundary if
+ one is reported by the device. This value must be a
+ power-of-two and at least the size as in
+ atomic_write_unit_max_bytes.
+ Any attempt to merge atomic write I/Os must not result in a
+ merged I/O which crosses this boundary (if any).
+
What: /sys/block/<disk>/diskseq
Date: February 2021
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index aa8290a29dc8..fd4b56c0996f 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -723,40 +723,26 @@ Configuration pseudo-files:
======================= =======================================================
SecurityFlags Flags which control security negotiation and
also packet signing. Authentication (may/must)
- flags (e.g. for NTLM and/or NTLMv2) may be combined with
+ flags (e.g. for NTLMv2) may be combined with
the signing flags. Specifying two different password
hashing mechanisms (as "must use") on the other hand
does not make much sense. Default flags are::
- 0x07007
-
- (NTLM, NTLMv2 and packet signing allowed). The maximum
- allowable flags if you want to allow mounts to servers
- using weaker password hashes is 0x37037 (lanman,
- plaintext, ntlm, ntlmv2, signing allowed). Some
- SecurityFlags require the corresponding menuconfig
- options to be enabled. Enabling plaintext
- authentication currently requires also enabling
- lanman authentication in the security flags
- because the cifs module only supports sending
- laintext passwords using the older lanman dialect
- form of the session setup SMB. (e.g. for authentication
- using plain text passwords, set the SecurityFlags
- to 0x30030)::
+ 0x00C5
+
+ (NTLMv2 and packet signing allowed). Some SecurityFlags
+ may require enabling a corresponding menuconfig option.
may use packet signing 0x00001
must use packet signing 0x01001
- may use NTLM (most common password hash) 0x00002
- must use NTLM 0x02002
may use NTLMv2 0x00004
must use NTLMv2 0x04004
- may use Kerberos security 0x00008
- must use Kerberos 0x08008
- may use lanman (weak) password hash 0x00010
- must use lanman password hash 0x10010
- may use plaintext passwords 0x00020
- must use plaintext passwords 0x20020
- (reserved for future packet encryption) 0x00040
+ may use Kerberos security (krb5) 0x00008
+ must use Kerberos 0x08008
+ may use NTLMSSP 0x00080
+ must use NTLMSSP 0x80080
+ seal (packet encryption) 0x00040
+ must seal (not implemented yet) 0x40040
cifsFYI If set to non-zero value, additional debug information
will be logged to the system error log. This field
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b600df82669d..27ec49af1bf2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -788,25 +788,6 @@
Documentation/networking/netconsole.rst for an
alternative.
- <DEVNAME>:<n>.<n>[,options]
- Use the specified serial port on the serial core bus.
- The addressing uses DEVNAME of the physical serial port
- device, followed by the serial core controller instance,
- and the serial port instance. The options are the same
- as documented for the ttyS addressing above.
-
- The mapping of the serial ports to the tty instances
- can be viewed with:
-
- $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/*
- /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0
-
- In the above example, the console can be addressed with
- console=00:04:0.0. Note that a console addressed this
- way will only get added when the related device driver
- is ready. The use of an earlycon parameter in addition to
- the console may be desired for console output early on.
-
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
uart[8250],mmio16,<addr>[,options]
@@ -2192,12 +2173,6 @@
Format: 0 | 1
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
- init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if
- it was mlock'ed and not explicitly munlock'ed
- afterwards.
- Format: 0 | 1
- Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON
-
init_pkru= [X86] Specify the default memory protection keys rights
register contents for all processes. 0x55555554 by
default (disallow access to all but pkey 0). Can
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
index 1c0ca06b6c97..8c48bcff3df9 100644
--- a/Documentation/arch/riscv/cmodx.rst
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -62,10 +62,10 @@ cmodx.c::
printf("Value before cmodx: %d\n", value);
// Call prctl before first fence.i is called inside modify_instruction
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
modify_instruction();
// Call prctl after final fence.i is called in process
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS);
value = get_value();
printf("Value after cmodx: %d\n", value);
diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
index 6a760c0eb192..99905e880a0e 100644
--- a/Documentation/block/data-integrity.rst
+++ b/Documentation/block/data-integrity.rst
@@ -153,18 +153,11 @@ bio_free() will automatically free the bip.
4.2 Block Device
----------------
-Because the format of the protection data is tied to the physical
-disk, each block device has been extended with a block integrity
-profile (struct blk_integrity). This optional profile is registered
-with the block layer using blk_integrity_register().
-
-The profile contains callback functions for generating and verifying
-the protection data, as well as getting and setting application tags.
-The profile also contains a few constants to aid in completing,
-merging and splitting the integrity metadata.
+Block devices can set up the integrity information in the integrity
+sub-struture of the queue_limits structure.
Layered block devices will need to pick a profile that's appropriate
-for all subdevices. blk_integrity_compare() can help with that. DM
+for all subdevices. queue_limits_stack_integrity() can help with that. DM
and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
will require extra work due to the application tag.
@@ -250,42 +243,6 @@ will require extra work due to the application tag.
integrity upon completion.
-5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
---------------------------------------------------------------------------
-
- To enable integrity exchange on a block device the gendisk must be
- registered as capable:
-
- `int blk_integrity_register(gendisk, blk_integrity);`
-
- The blk_integrity struct is a template and should contain the
- following::
-
- static struct blk_integrity my_profile = {
- .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
- .generate_fn = my_generate_fn,
- .verify_fn = my_verify_fn,
- .tuple_size = sizeof(struct my_tuple_size),
- .tag_size = <tag bytes per hw sector>,
- };
-
- 'name' is a text string which will be visible in sysfs. This is
- part of the userland API so chose it carefully and never change
- it. The format is standards body-type-variant.
- E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
-
- 'generate_fn' generates appropriate integrity metadata (for WRITE).
-
- 'verify_fn' verifies that the data buffer matches the integrity
- metadata.
-
- 'tuple_size' must be set to match the size of the integrity
- metadata per sector. I.e. 8 for DIF and EPP.
-
- 'tag_size' must be set to identify how many bytes of tag space
- are available per hardware sector. For DIF this is either 2 or
- 0 depending on the value of the Control Mode Page ATO bit.
-
----------------------------------------------------------------------
2007-12-24 Martin K. Petersen <[email protected]>
diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst
index b208488d0aae..c3707d071780 100644
--- a/Documentation/block/writeback_cache_control.rst
+++ b/Documentation/block/writeback_cache_control.rst
@@ -46,41 +46,50 @@ worry if the underlying devices need any explicit cache flushing and how
the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
may both be set on a single bio.
+Feature settings for block drivers
+----------------------------------
-Implementation details for bio based block drivers
---------------------------------------------------------------
+For devices that do not support volatile write caches there is no driver
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
+requests that have a payload.
-These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
-directly below the submit_bio interface. For remapping drivers the REQ_FUA
-bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_PREFLUSH bit set. For real device
-drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
-data can be completed successfully without doing any work. Drivers for
-devices with volatile caches need to implement the support for these
-flags themselves without any help from the block layer.
+For devices with volatile write caches the driver needs to tell the block layer
+that it supports flushing caches by setting the
+ BLK_FEAT_WRITE_CACHE
-Implementation details for request_fn based block drivers
----------------------------------------------------------
+flag in the queue_limits feature field. For devices that also support the FUA
+bit the block layer needs to be told to pass on the REQ_FUA bit by also setting
+the
-For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_PREFLUSH requests before
-entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
-requests that have a payload. For devices with volatile write caches the
-driver needs to tell the block layer that it supports flushing caches by
-doing::
+ BLK_FEAT_FUA
+
+flag in the features field of the queue_limits structure.
+
+Implementation details for bio based block drivers
+--------------------------------------------------
+
+For bio based drivers the REQ_PREFLUSH and REQ_FUA bit are simply passed on to
+the driver if the driver sets the BLK_FEAT_WRITE_CACHE flag and the driver
+needs to handle them.
+
+*NOTE*: The REQ_FUA bit also gets passed on when the BLK_FEAT_FUA flags is
+_not_ set. Any bio based driver that sets BLK_FEAT_WRITE_CACHE also needs to
+handle REQ_FUA.
- blk_queue_write_cache(sdkp->disk->queue, true, false);
+For remapping drivers the REQ_FUA bits need to be propagated to underlying
+devices, and a global flush needs to be implemented for bios with the
+REQ_PREFLUSH bit set.
-and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that
-REQ_PREFLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_OP_FLUSH request followed by the actual write by the block
-layer. For devices that also support the FUA bit the block layer needs
-to be told to pass through the REQ_FUA bit using::
+Implementation details for blk-mq drivers
+-----------------------------------------
- blk_queue_write_cache(sdkp->disk->queue, true, true);
+When the BLK_FEAT_WRITE_CACHE flag is set, REQ_OP_WRITE | REQ_PREFLUSH requests
+with a payload are automatically turned into a sequence of a REQ_OP_FLUSH
+request followed by the actual write by the block layer.
-and the driver must handle write requests that have the REQ_FUA bit set
-in prep_fn/request_fn. If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_OP_FLUSH request after the actual write.
+When the BLK_FEAT_FUA flags is set, the REQ_FUA bit is simply passed on for the
+REQ_OP_WRITE request, else a REQ_OP_FLUSH request is sent by the block layer
+after the completion of the write request for bio submissions with the REQ_FUA
+bit set.
diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
index 07ccbda4a0ab..b9a9f2cf32a1 100644
--- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
+++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
@@ -66,7 +66,6 @@ allOf:
compatible:
contains:
enum:
- - qcom,qdu1000-llcc
- qcom,sc7180-llcc
- qcom,sm6350-llcc
then:
@@ -104,6 +103,7 @@ allOf:
compatible:
contains:
enum:
+ - qcom,qdu1000-llcc
- qcom,sc8180x-llcc
- qcom,sc8280xp-llcc
- qcom,x1e80100-llcc
diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml
index acfb4b2ee7a9..d54140f18d34 100644
--- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml
+++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml
@@ -59,8 +59,8 @@ properties:
- 3
dma-channels:
- minItems: 1
- maxItems: 64
+ minimum: 1
+ maximum: 64
clocks:
minItems: 1
diff --git a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml
index b1c13bab2472..b2d19cfb87ad 100644
--- a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml
@@ -77,7 +77,7 @@ required:
- clocks
allOf:
- - $ref: i2c-controller.yaml
+ - $ref: /schemas/i2c/i2c-controller.yaml#
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
index ab151c9db219..580003cdfff5 100644
--- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
+++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
@@ -21,7 +21,7 @@ description: |
google,cros-ec-spi or google,cros-ec-i2c.
allOf:
- - $ref: i2c-controller.yaml#
+ - $ref: /schemas/i2c/i2c-controller.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
index c80c880a9dab..60aaf30d68ed 100644
--- a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
@@ -128,7 +128,6 @@ required:
- cell-index
- reg
- fsl,fman-ports
- - ptp-timer
dependencies:
pcs-handle-names:
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml
index 50846a2d09c8..0bf2d9f093b5 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml
@@ -29,7 +29,6 @@ properties:
- qcom,pm7325-gpio
- qcom,pm7550ba-gpio
- qcom,pm8005-gpio
- - qcom,pm8008-gpio
- qcom,pm8018-gpio
- qcom,pm8019-gpio
- qcom,pm8038-gpio
@@ -126,7 +125,6 @@ allOf:
compatible:
contains:
enum:
- - qcom,pm8008-gpio
- qcom,pmi8950-gpio
- qcom,pmr735d-gpio
then:
@@ -448,7 +446,6 @@ $defs:
- gpio1-gpio10 for pm7325
- gpio1-gpio8 for pm7550ba
- gpio1-gpio4 for pm8005
- - gpio1-gpio2 for pm8008
- gpio1-gpio6 for pm8018
- gpio1-gpio12 for pm8038
- gpio1-gpio40 for pm8058
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index 5149ecdc53c7..d732c42526df 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -328,6 +328,12 @@ CXL Memory Device
.. kernel-doc:: drivers/cxl/mem.c
:doc: cxl mem
+.. kernel-doc:: drivers/cxl/cxlmem.h
+ :internal:
+
+.. kernel-doc:: drivers/cxl/core/memdev.c
+ :identifiers:
+
CXL Port
--------
.. kernel-doc:: drivers/cxl/port.c
@@ -341,6 +347,15 @@ CXL Core
.. kernel-doc:: drivers/cxl/cxl.h
:internal:
+.. kernel-doc:: drivers/cxl/core/hdm.c
+ :doc: cxl core hdm
+
+.. kernel-doc:: drivers/cxl/core/hdm.c
+ :identifiers:
+
+.. kernel-doc:: drivers/cxl/core/cdat.c
+ :identifiers:
+
.. kernel-doc:: drivers/cxl/core/port.c
:doc: cxl core
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 8f5c1ee02e2f..e8e496d23e1d 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -34,6 +34,7 @@ algorithms work.
seq_file
sharedsubtree
idmappings
+ iomap/index
automount-support
diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst
new file mode 100644
index 000000000000..f8ee3427bc1a
--- /dev/null
+++ b/Documentation/filesystems/iomap/design.rst
@@ -0,0 +1,441 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _iomap_design:
+
+..
+ Dumb style notes to maintain the author's sanity:
+ Please try to start sentences on separate lines so that
+ sentence changes don't bleed colors in diff.
+ Heading decorations are documented in sphinx.rst.
+
+==============
+Library Design
+==============
+
+.. contents:: Table of Contents
+ :local:
+
+Introduction
+============
+
+iomap is a filesystem library for handling common file operations.
+The library has two layers:
+
+ 1. A lower layer that provides an iterator over ranges of file offsets.
+ This layer tries to obtain mappings of each file ranges to storage
+ from the filesystem, but the storage information is not necessarily
+ required.
+
+ 2. An upper layer that acts upon the space mappings provided by the
+ lower layer iterator.
+
+The iteration can involve mappings of file's logical offset ranges to
+physical extents, but the storage layer information is not necessarily
+required, e.g. for walking cached file information.
+The library exports various APIs for implementing file operations such
+as:
+
+ * Pagecache reads and writes
+ * Folio write faults to the pagecache
+ * Writeback of dirty folios
+ * Direct I/O reads and writes
+ * fsdax I/O reads, writes, loads, and stores
+ * FIEMAP
+ * lseek ``SEEK_DATA`` and ``SEEK_HOLE``
+ * swapfile activation
+
+This origins of this library is the file I/O path that XFS once used; it
+has now been extended to cover several other operations.
+
+Who Should Read This?
+=====================
+
+The target audience for this document are filesystem, storage, and
+pagecache programmers and code reviewers.
+
+If you are working on PCI, machine architectures, or device drivers, you
+are most likely in the wrong place.
+
+How Is This Better?
+===================
+
+Unlike the classic Linux I/O model which breaks file I/O into small
+units (generally memory pages or blocks) and looks up space mappings on
+the basis of that unit, the iomap model asks the filesystem for the
+largest space mappings that it can create for a given file operation and
+initiates operations on that basis.
+This strategy improves the filesystem's visibility into the size of the
+operation being performed, which enables it to combat fragmentation with
+larger space allocations when possible.
+Larger space mappings improve runtime performance by amortizing the cost
+of mapping function calls into the filesystem across a larger amount of
+data.
+
+At a high level, an iomap operation `looks like this
+<https://lore.kernel.org/all/[email protected]/>`_:
+
+1. For each byte in the operation range...
+
+ 1. Obtain a space mapping via ``->iomap_begin``
+
+ 2. For each sub-unit of work...
+
+ 1. Revalidate the mapping and go back to (1) above, if necessary.
+ So far only the pagecache operations need to do this.
+
+ 2. Do the work
+
+ 3. Increment operation cursor
+
+ 4. Release the mapping via ``->iomap_end``, if necessary
+
+Each iomap operation will be covered in more detail below.
+This library was covered previously by an `LWN article
+<https://lwn.net/Articles/935934/>`_ and a `KernelNewbies page
+<https://kernelnewbies.org/KernelProjects/iomap>`_.
+
+The goal of this document is to provide a brief discussion of the
+design and capabilities of iomap, followed by a more detailed catalog
+of the interfaces presented by iomap.
+If you change iomap, please update this design document.
+
+File Range Iterator
+===================
+
+Definitions
+-----------
+
+ * **buffer head**: Shattered remnants of the old buffer cache.
+
+ * ``fsblock``: The block size of a file, also known as ``i_blocksize``.
+
+ * ``i_rwsem``: The VFS ``struct inode`` rwsemaphore.
+ Processes hold this in shared mode to read file state and contents.
+ Some filesystems may allow shared mode for writes.
+ Processes often hold this in exclusive mode to change file state and
+ contents.
+
+ * ``invalidate_lock``: The pagecache ``struct address_space``
+ rwsemaphore that protects against folio insertion and removal for
+ filesystems that support punching out folios below EOF.
+ Processes wishing to insert folios must hold this lock in shared
+ mode to prevent removal, though concurrent insertion is allowed.
+ Processes wishing to remove folios must hold this lock in exclusive
+ mode to prevent insertions.
+ Concurrent removals are not allowed.
+
+ * ``dax_read_lock``: The RCU read lock that dax takes to prevent a
+ device pre-shutdown hook from returning before other threads have
+ released resources.
+
+ * **filesystem mapping lock**: This synchronization primitive is
+ internal to the filesystem and must protect the file mapping data
+ from updates while a mapping is being sampled.
+ The filesystem author must determine how this coordination should
+ happen; it does not need to be an actual lock.
+
+ * **iomap internal operation lock**: This is a general term for
+ synchronization primitives that iomap functions take while holding a
+ mapping.
+ A specific example would be taking the folio lock while reading or
+ writing the pagecache.
+
+ * **pure overwrite**: A write operation that does not require any
+ metadata or zeroing operations to perform during either submission
+ or completion.
+ This implies that the fileystem must have already allocated space
+ on disk as ``IOMAP_MAPPED`` and the filesystem must not place any
+ constaints on IO alignment or size.
+ The only constraints on I/O alignment are device level (minimum I/O
+ size and alignment, typically sector size).
+
+``struct iomap``
+----------------
+
+The filesystem communicates to the iomap iterator the mapping of
+byte ranges of a file to byte ranges of a storage device with the
+structure below:
+
+.. code-block:: c
+
+ struct iomap {
+ u64 addr;
+ loff_t offset;
+ u64 length;
+ u16 type;
+ u16 flags;
+ struct block_device *bdev;
+ struct dax_device *dax_dev;
+ voidw *inline_data;
+ void *private;
+ const struct iomap_folio_ops *folio_ops;
+ u64 validity_cookie;
+ };
+
+The fields are as follows:
+
+ * ``offset`` and ``length`` describe the range of file offsets, in
+ bytes, covered by this mapping.
+ These fields must always be set by the filesystem.
+
+ * ``type`` describes the type of the space mapping:
+
+ * **IOMAP_HOLE**: No storage has been allocated.
+ This type must never be returned in response to an ``IOMAP_WRITE``
+ operation because writes must allocate and map space, and return
+ the mapping.
+ The ``addr`` field must be set to ``IOMAP_NULL_ADDR``.
+ iomap does not support writing (whether via pagecache or direct
+ I/O) to a hole.
+
+ * **IOMAP_DELALLOC**: A promise to allocate space at a later time
+ ("delayed allocation").
+ If the filesystem returns IOMAP_F_NEW here and the write fails, the
+ ``->iomap_end`` function must delete the reservation.
+ The ``addr`` field must be set to ``IOMAP_NULL_ADDR``.
+
+ * **IOMAP_MAPPED**: The file range maps to specific space on the
+ storage device.
+ The device is returned in ``bdev`` or ``dax_dev``.
+ The device address, in bytes, is returned via ``addr``.
+
+ * **IOMAP_UNWRITTEN**: The file range maps to specific space on the
+ storage device, but the space has not yet been initialized.
+ The device is returned in ``bdev`` or ``dax_dev``.
+ The device address, in bytes, is returned via ``addr``.
+ Reads from this type of mapping will return zeroes to the caller.
+ For a write or writeback operation, the ioend should update the
+ mapping to MAPPED.
+ Refer to the sections about ioends for more details.
+
+ * **IOMAP_INLINE**: The file range maps to the memory buffer
+ specified by ``inline_data``.
+ For write operation, the ``->iomap_end`` function presumably
+ handles persisting the data.
+ The ``addr`` field must be set to ``IOMAP_NULL_ADDR``.
+
+ * ``flags`` describe the status of the space mapping.
+ These flags should be set by the filesystem in ``->iomap_begin``:
+
+ * **IOMAP_F_NEW**: The space under the mapping is newly allocated.
+ Areas that will not be written to must be zeroed.
+ If a write fails and the mapping is a space reservation, the
+ reservation must be deleted.
+
+ * **IOMAP_F_DIRTY**: The inode will have uncommitted metadata needed
+ to access any data written.
+ fdatasync is required to commit these changes to persistent
+ storage.
+ This needs to take into account metadata changes that *may* be made
+ at I/O completion, such as file size updates from direct I/O.
+
+ * **IOMAP_F_SHARED**: The space under the mapping is shared.
+ Copy on write is necessary to avoid corrupting other file data.
+
+ * **IOMAP_F_BUFFER_HEAD**: This mapping requires the use of buffer
+ heads for pagecache operations.
+ Do not add more uses of this.
+
+ * **IOMAP_F_MERGED**: Multiple contiguous block mappings were
+ coalesced into this single mapping.
+ This is only useful for FIEMAP.
+
+ * **IOMAP_F_XATTR**: The mapping is for extended attribute data, not
+ regular file data.
+ This is only useful for FIEMAP.
+
+ * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
+ be set by the filesystem for its own purposes.
+
+ These flags can be set by iomap itself during file operations.
+ The filesystem should supply an ``->iomap_end`` function if it needs
+ to observe these flags:
+
+ * **IOMAP_F_SIZE_CHANGED**: The file size has changed as a result of
+ using this mapping.
+
+ * **IOMAP_F_STALE**: The mapping was found to be stale.
+ iomap will call ``->iomap_end`` on this mapping and then
+ ``->iomap_begin`` to obtain a new mapping.
+
+ Currently, these flags are only set by pagecache operations.
+
+ * ``addr`` describes the device address, in bytes.
+
+ * ``bdev`` describes the block device for this mapping.
+ This only needs to be set for mapped or unwritten operations.
+
+ * ``dax_dev`` describes the DAX device for this mapping.
+ This only needs to be set for mapped or unwritten operations, and
+ only for a fsdax operation.
+
+ * ``inline_data`` points to a memory buffer for I/O involving
+ ``IOMAP_INLINE`` mappings.
+ This value is ignored for all other mapping types.
+
+ * ``private`` is a pointer to `filesystem-private information
+ <https://lore.kernel.org/all/[email protected]/>`_.
+ This value will be passed unchanged to ``->iomap_end``.
+
+ * ``folio_ops`` will be covered in the section on pagecache operations.
+
+ * ``validity_cookie`` is a magic freshness value set by the filesystem
+ that should be used to detect stale mappings.
+ For pagecache operations this is critical for correct operation
+ because page faults can occur, which implies that filesystem locks
+ should not be held between ``->iomap_begin`` and ``->iomap_end``.
+ Filesystems with completely static mappings need not set this value.
+ Only pagecache operations revalidate mappings; see the section about
+ ``iomap_valid`` for details.
+
+``struct iomap_ops``
+--------------------
+
+Every iomap function requires the filesystem to pass an operations
+structure to obtain a mapping and (optionally) to release the mapping:
+
+.. code-block:: c
+
+ struct iomap_ops {
+ int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap,
+ struct iomap *srcmap);
+
+ int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned flags,
+ struct iomap *iomap);
+ };
+
+``->iomap_begin``
+~~~~~~~~~~~~~~~~~
+
+iomap operations call ``->iomap_begin`` to obtain one file mapping for
+the range of bytes specified by ``pos`` and ``length`` for the file
+``inode``.
+This mapping should be returned through the ``iomap`` pointer.
+The mapping must cover at least the first byte of the supplied file
+range, but it does not need to cover the entire requested range.
+
+Each iomap operation describes the requested operation through the
+``flags`` argument.
+The exact value of ``flags`` will be documented in the
+operation-specific sections below.
+These flags can, at least in principle, apply generally to iomap
+operations:
+
+ * ``IOMAP_DIRECT`` is set when the caller wishes to issue file I/O to
+ block storage.
+
+ * ``IOMAP_DAX`` is set when the caller wishes to issue file I/O to
+ memory-like storage.
+
+ * ``IOMAP_NOWAIT`` is set when the caller wishes to perform a best
+ effort attempt to avoid any operation that would result in blocking
+ the submitting task.
+ This is similar in intent to ``O_NONBLOCK`` for network APIs - it is
+ intended for asynchronous applications to keep doing other work
+ instead of waiting for the specific unavailable filesystem resource
+ to become available.
+ Filesystems implementing ``IOMAP_NOWAIT`` semantics need to use
+ trylock algorithms.
+ They need to be able to satisfy the entire I/O request range with a
+ single iomap mapping.
+ They need to avoid reading or writing metadata synchronously.
+ They need to avoid blocking memory allocations.
+ They need to avoid waiting on transaction reservations to allow
+ modifications to take place.
+ They probably should not be allocating new space.
+ And so on.
+ If there is any doubt in the filesystem developer's mind as to
+ whether any specific ``IOMAP_NOWAIT`` operation may end up blocking,
+ then they should return ``-EAGAIN`` as early as possible rather than
+ start the operation and force the submitting task to block.
+ ``IOMAP_NOWAIT`` is often set on behalf of ``IOCB_NOWAIT`` or
+ ``RWF_NOWAIT``.
+
+If it is necessary to read existing file contents from a `different
+<https://lore.kernel.org/all/[email protected]/>`_
+device or address range on a device, the filesystem should return that
+information via ``srcmap``.
+Only pagecache and fsdax operations support reading from one mapping and
+writing to another.
+
+``->iomap_end``
+~~~~~~~~~~~~~~~
+
+After the operation completes, the ``->iomap_end`` function, if present,
+is called to signal that iomap is finished with a mapping.
+Typically, implementations will use this function to tear down any
+context that were set up in ``->iomap_begin``.
+For example, a write might wish to commit the reservations for the bytes
+that were operated upon and unreserve any space that was not operated
+upon.
+``written`` might be zero if no bytes were touched.
+``flags`` will contain the same value passed to ``->iomap_begin``.
+iomap ops for reads are not likely to need to supply this function.
+
+Both functions should return a negative errno code on error, or zero on
+success.
+
+Preparing for File Operations
+=============================
+
+iomap only handles mapping and I/O.
+Filesystems must still call out to the VFS to check input parameters
+and file state before initiating an I/O operation.
+It does not handle obtaining filesystem freeze protection, updating of
+timestamps, stripping privileges, or access control.
+
+Locking Hierarchy
+=================
+
+iomap requires that filesystems supply their own locking model.
+There are three categories of synchronization primitives, as far as
+iomap is concerned:
+
+ * The **upper** level primitive is provided by the filesystem to
+ coordinate access to different iomap operations.
+ The exact primitive is specifc to the filesystem and operation,
+ but is often a VFS inode, pagecache invalidation, or folio lock.
+ For example, a filesystem might take ``i_rwsem`` before calling
+ ``iomap_file_buffered_write`` and ``iomap_file_unshare`` to prevent
+ these two file operations from clobbering each other.
+ Pagecache writeback may lock a folio to prevent other threads from
+ accessing the folio until writeback is underway.
+
+ * The **lower** level primitive is taken by the filesystem in the
+ ``->iomap_begin`` and ``->iomap_end`` functions to coordinate
+ access to the file space mapping information.
+ The fields of the iomap object should be filled out while holding
+ this primitive.
+ The upper level synchronization primitive, if any, remains held
+ while acquiring the lower level synchronization primitive.
+ For example, XFS takes ``ILOCK_EXCL`` and ext4 takes ``i_data_sem``
+ while sampling mappings.
+ Filesystems with immutable mapping information may not require
+ synchronization here.
+
+ * The **operation** primitive is taken by an iomap operation to
+ coordinate access to its own internal data structures.
+ The upper level synchronization primitive, if any, remains held
+ while acquiring this primitive.
+ The lower level primitive is not held while acquiring this
+ primitive.
+ For example, pagecache write operations will obtain a file mapping,
+ then grab and lock a folio to copy new contents.
+ It may also lock an internal folio state object to update metadata.
+
+The exact locking requirements are specific to the filesystem; for
+certain operations, some of these locks can be elided.
+All further mention of locking are *recommendations*, not mandates.
+Each filesystem author must figure out the locking for themself.
+
+Bugs and Limitations
+====================
+
+ * No support for fscrypt.
+ * No support for compression.
+ * No support for fsverity yet.
+ * Strong assumptions that IO should work the way it does on XFS.
+ * Does iomap *actually* work for non-regular file data?
+
+Patches welcome!
diff --git a/Documentation/filesystems/iomap/index.rst b/Documentation/filesystems/iomap/index.rst
new file mode 100644
index 000000000000..3c6a52440250
--- /dev/null
+++ b/Documentation/filesystems/iomap/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+VFS iomap Documentation
+=======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ design
+ operations
+ porting
diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
new file mode 100644
index 000000000000..8e6c721d2330
--- /dev/null
+++ b/Documentation/filesystems/iomap/operations.rst
@@ -0,0 +1,713 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _iomap_operations:
+
+..
+ Dumb style notes to maintain the author's sanity:
+ Please try to start sentences on separate lines so that
+ sentence changes don't bleed colors in diff.
+ Heading decorations are documented in sphinx.rst.
+
+=========================
+Supported File Operations
+=========================
+
+.. contents:: Table of Contents
+ :local:
+
+Below are a discussion of the high level file operations that iomap
+implements.
+
+Buffered I/O
+============
+
+Buffered I/O is the default file I/O path in Linux.
+File contents are cached in memory ("pagecache") to satisfy reads and
+writes.
+Dirty cache will be written back to disk at some point that can be
+forced via ``fsync`` and variants.
+
+iomap implements nearly all the folio and pagecache management that
+filesystems have to implement themselves under the legacy I/O model.
+This means that the filesystem need not know the details of allocating,
+mapping, managing uptodate and dirty state, or writeback of pagecache
+folios.
+Under the legacy I/O model, this was managed very inefficiently with
+linked lists of buffer heads instead of the per-folio bitmaps that iomap
+uses.
+Unless the filesystem explicitly opts in to buffer heads, they will not
+be used, which makes buffered I/O much more efficient, and the pagecache
+maintainer much happier.
+
+``struct address_space_operations``
+-----------------------------------
+
+The following iomap functions can be referenced directly from the
+address space operations structure:
+
+ * ``iomap_dirty_folio``
+ * ``iomap_release_folio``
+ * ``iomap_invalidate_folio``
+ * ``iomap_is_partially_uptodate``
+
+The following address space operations can be wrapped easily:
+
+ * ``read_folio``
+ * ``readahead``
+ * ``writepages``
+ * ``bmap``
+ * ``swap_activate``
+
+``struct iomap_folio_ops``
+--------------------------
+
+The ``->iomap_begin`` function for pagecache operations may set the
+``struct iomap::folio_ops`` field to an ops structure to override
+default behaviors of iomap:
+
+.. code-block:: c
+
+ struct iomap_folio_ops {
+ struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
+ unsigned len);
+ void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
+ struct folio *folio);
+ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
+ };
+
+iomap calls these functions:
+
+ - ``get_folio``: Called to allocate and return an active reference to
+ a locked folio prior to starting a write.
+ If this function is not provided, iomap will call
+ ``iomap_get_folio``.
+ This could be used to `set up per-folio filesystem state
+ <https://lore.kernel.org/all/[email protected]/>`_
+ for a write.
+
+ - ``put_folio``: Called to unlock and put a folio after a pagecache
+ operation completes.
+ If this function is not provided, iomap will ``folio_unlock`` and
+ ``folio_put`` on its own.
+ This could be used to `commit per-folio filesystem state
+ <https://lore.kernel.org/all/[email protected]/>`_
+ that was set up by ``->get_folio``.
+
+ - ``iomap_valid``: The filesystem may not hold locks between
+ ``->iomap_begin`` and ``->iomap_end`` because pagecache operations
+ can take folio locks, fault on userspace pages, initiate writeback
+ for memory reclamation, or engage in other time-consuming actions.
+ If a file's space mapping data are mutable, it is possible that the
+ mapping for a particular pagecache folio can `change in the time it
+ takes
+ <https://lore.kernel.org/all/[email protected]/>`_
+ to allocate, install, and lock that folio.
+
+ For the pagecache, races can happen if writeback doesn't take
+ ``i_rwsem`` or ``invalidate_lock`` and updates mapping information.
+ Races can also happen if the filesytem allows concurrent writes.
+ For such files, the mapping *must* be revalidated after the folio
+ lock has been taken so that iomap can manage the folio correctly.
+
+ fsdax does not need this revalidation because there's no writeback
+ and no support for unwritten extents.
+
+ Filesystems subject to this kind of race must provide a
+ ``->iomap_valid`` function to decide if the mapping is still valid.
+ If the mapping is not valid, the mapping will be sampled again.
+
+ To support making the validity decision, the filesystem's
+ ``->iomap_begin`` function may set ``struct iomap::validity_cookie``
+ at the same time that it populates the other iomap fields.
+ A simple validation cookie implementation is a sequence counter.
+ If the filesystem bumps the sequence counter every time it modifies
+ the inode's extent map, it can be placed in the ``struct
+ iomap::validity_cookie`` during ``->iomap_begin``.
+ If the value in the cookie is found to be different to the value
+ the filesystem holds when the mapping is passed back to
+ ``->iomap_valid``, then the iomap should considered stale and the
+ validation failed.
+
+These ``struct kiocb`` flags are significant for buffered I/O with iomap:
+
+ * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
+
+Internal per-Folio State
+------------------------
+
+If the fsblock size matches the size of a pagecache folio, it is assumed
+that all disk I/O operations will operate on the entire folio.
+The uptodate (memory contents are at least as new as what's on disk) and
+dirty (memory contents are newer than what's on disk) status of the
+folio are all that's needed for this case.
+
+If the fsblock size is less than the size of a pagecache folio, iomap
+tracks the per-fsblock uptodate and dirty state itself.
+This enables iomap to handle both "bs < ps" `filesystems
+<https://lore.kernel.org/all/[email protected]/>`_
+and large folios in the pagecache.
+
+iomap internally tracks two state bits per fsblock:
+
+ * ``uptodate``: iomap will try to keep folios fully up to date.
+ If there are read(ahead) errors, those fsblocks will not be marked
+ uptodate.
+ The folio itself will be marked uptodate when all fsblocks within the
+ folio are uptodate.
+
+ * ``dirty``: iomap will set the per-block dirty state when programs
+ write to the file.
+ The folio itself will be marked dirty when any fsblock within the
+ folio is dirty.
+
+iomap also tracks the amount of read and write disk IOs that are in
+flight.
+This structure is much lighter weight than ``struct buffer_head``
+because there is only one per folio, and the per-fsblock overhead is two
+bits vs. 104 bytes.
+
+Filesystems wishing to turn on large folios in the pagecache should call
+``mapping_set_large_folios`` when initializing the incore inode.
+
+Buffered Readahead and Reads
+----------------------------
+
+The ``iomap_readahead`` function initiates readahead to the pagecache.
+The ``iomap_read_folio`` function reads one folio's worth of data into
+the pagecache.
+The ``flags`` argument to ``->iomap_begin`` will be set to zero.
+The pagecache takes whatever locks it needs before calling the
+filesystem.
+
+Buffered Writes
+---------------
+
+The ``iomap_file_buffered_write`` function writes an ``iocb`` to the
+pagecache.
+``IOMAP_WRITE`` or ``IOMAP_WRITE`` | ``IOMAP_NOWAIT`` will be passed as
+the ``flags`` argument to ``->iomap_begin``.
+Callers commonly take ``i_rwsem`` in either shared or exclusive mode
+before calling this function.
+
+mmap Write Faults
+~~~~~~~~~~~~~~~~~
+
+The ``iomap_page_mkwrite`` function handles a write fault to a folio in
+the pagecache.
+``IOMAP_WRITE | IOMAP_FAULT`` will be passed as the ``flags`` argument
+to ``->iomap_begin``.
+Callers commonly take the mmap ``invalidate_lock`` in shared or
+exclusive mode before calling this function.
+
+Buffered Write Failures
+~~~~~~~~~~~~~~~~~~~~~~~
+
+After a short write to the pagecache, the areas not written will not
+become marked dirty.
+The filesystem must arrange to `cancel
+<https://lore.kernel.org/all/[email protected]/>`_
+such `reservations
+<https://lore.kernel.org/linux-xfs/[email protected]/>`_
+because writeback will not consume the reservation.
+The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
+``->iomap_end`` function to find all the clean areas of the folios
+caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
+It takes the ``invalidate_lock``.
+
+The filesystem must supply a function ``punch`` to be called for
+each file range in this state.
+This function must *only* remove delayed allocation reservations, in
+case another thread racing with the current thread writes successfully
+to the same region and triggers writeback to flush the dirty data out to
+disk.
+
+Zeroing for File Operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Filesystems can call ``iomap_zero_range`` to perform zeroing of the
+pagecache for non-truncation file operations that are not aligned to
+the fsblock size.
+``IOMAP_ZERO`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive
+mode before calling this function.
+
+Unsharing Reflinked File Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Filesystems can call ``iomap_file_unshare`` to force a file sharing
+storage with another file to preemptively copy the shared data to newly
+allocate storage.
+``IOMAP_WRITE | IOMAP_UNSHARE`` will be passed as the ``flags`` argument
+to ``->iomap_begin``.
+Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive
+mode before calling this function.
+
+Truncation
+----------
+
+Filesystems can call ``iomap_truncate_page`` to zero the bytes in the
+pagecache from EOF to the end of the fsblock during a file truncation
+operation.
+``truncate_setsize`` or ``truncate_pagecache`` will take care of
+everything after the EOF block.
+``IOMAP_ZERO`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive
+mode before calling this function.
+
+Pagecache Writeback
+-------------------
+
+Filesystems can call ``iomap_writepages`` to respond to a request to
+write dirty pagecache folios to disk.
+The ``mapping`` and ``wbc`` parameters should be passed unchanged.
+The ``wpc`` pointer should be allocated by the filesystem and must
+be initialized to zero.
+
+The pagecache will lock each folio before trying to schedule it for
+writeback.
+It does not lock ``i_rwsem`` or ``invalidate_lock``.
+
+The dirty bit will be cleared for all folios run through the
+``->map_blocks`` machinery described below even if the writeback fails.
+This is to prevent dirty folio clots when storage devices fail; an
+``-EIO`` is recorded for userspace to collect via ``fsync``.
+
+The ``ops`` structure must be specified and is as follows:
+
+``struct iomap_writeback_ops``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ struct iomap_writeback_ops {
+ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
+ loff_t offset, unsigned len);
+ int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
+ void (*discard_folio)(struct folio *folio, loff_t pos);
+ };
+
+The fields are as follows:
+
+ - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
+ range (in bytes) given by ``offset`` and ``len``.
+ iomap calls this function for each dirty fs block in each dirty folio,
+ though it will `reuse mappings
+ <https://lore.kernel.org/all/[email protected]/>`_
+ for runs of contiguous dirty fsblocks within a folio.
+ Do not return ``IOMAP_INLINE`` mappings here; the ``->iomap_end``
+ function must deal with persisting written data.
+ Do not return ``IOMAP_DELALLOC`` mappings here; iomap currently
+ requires mapping to allocated space.
+ Filesystems can skip a potentially expensive mapping lookup if the
+ mappings have not changed.
+ This revalidation must be open-coded by the filesystem; it is
+ unclear if ``iomap::validity_cookie`` can be reused for this
+ purpose.
+ This function must be supplied by the filesystem.
+
+ - ``prepare_ioend``: Enables filesystems to transform the writeback
+ ioend or perform any other preparatory work before the writeback I/O
+ is submitted.
+ This might include pre-write space accounting updates, or installing
+ a custom ``->bi_end_io`` function for internal purposes, such as
+ deferring the ioend completion to a workqueue to run metadata update
+ transactions from process context.
+ This function is optional.
+
+ - ``discard_folio``: iomap calls this function after ``->map_blocks``
+ fails to schedule I/O for any part of a dirty folio.
+ The function should throw away any reservations that may have been
+ made for the write.
+ The folio will be marked clean and an ``-EIO`` recorded in the
+ pagecache.
+ Filesystems can use this callback to `remove
+ <https://lore.kernel.org/all/[email protected]/>`_
+ delalloc reservations to avoid having delalloc reservations for
+ clean pagecache.
+ This function is optional.
+
+Pagecache Writeback Completion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To handle the bookkeeping that must happen after disk I/O for writeback
+completes, iomap creates chains of ``struct iomap_ioend`` objects that
+wrap the ``bio`` that is used to write pagecache data to disk.
+By default, iomap finishes writeback ioends by clearing the writeback
+bit on the folios attached to the ``ioend``.
+If the write failed, it will also set the error bits on the folios and
+the address space.
+This can happen in interrupt or process context, depending on the
+storage device.
+
+Filesystems that need to update internal bookkeeping (e.g. unwritten
+extent conversions) should provide a ``->prepare_ioend`` function to
+set ``struct iomap_end::bio::bi_end_io`` to its own function.
+This function should call ``iomap_finish_ioends`` after finishing its
+own work (e.g. unwritten extent conversion).
+
+Some filesystems may wish to `amortize the cost of running metadata
+transactions
+<https://lore.kernel.org/all/[email protected]/>`_
+for post-writeback updates by batching them.
+They may also require transactions to run from process context, which
+implies punting batches to a workqueue.
+iomap ioends contain a ``list_head`` to enable batching.
+
+Given a batch of ioends, iomap has a few helpers to assist with
+amortization:
+
+ * ``iomap_sort_ioends``: Sort all the ioends in the list by file
+ offset.
+
+ * ``iomap_ioend_try_merge``: Given an ioend that is not in any list and
+ a separate list of sorted ioends, merge as many of the ioends from
+ the head of the list into the given ioend.
+ ioends can only be merged if the file range and storage addresses are
+ contiguous; the unwritten and shared status are the same; and the
+ write I/O outcome is the same.
+ The merged ioends become their own list.
+
+ * ``iomap_finish_ioends``: Finish an ioend that possibly has other
+ ioends linked to it.
+
+Direct I/O
+==========
+
+In Linux, direct I/O is defined as file I/O that is issued directly to
+storage, bypassing the pagecache.
+The ``iomap_dio_rw`` function implements O_DIRECT (direct I/O) reads and
+writes for files.
+
+.. code-block:: c
+
+ ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops,
+ const struct iomap_dio_ops *dops,
+ unsigned int dio_flags, void *private,
+ size_t done_before);
+
+The filesystem can provide the ``dops`` parameter if it needs to perform
+extra work before or after the I/O is issued to storage.
+The ``done_before`` parameter tells the how much of the request has
+already been transferred.
+It is used to continue a request asynchronously when `part of the
+request
+<https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c03098d4b9ad76bca2966a8769dcfe59f7f85103>`_
+has already been completed synchronously.
+
+The ``done_before`` parameter should be set if writes for the ``iocb``
+have been initiated prior to the call.
+The direction of the I/O is determined from the ``iocb`` passed in.
+
+The ``dio_flags`` argument can be set to any combination of the
+following values:
+
+ * ``IOMAP_DIO_FORCE_WAIT``: Wait for the I/O to complete even if the
+ kiocb is not synchronous.
+
+ * ``IOMAP_DIO_OVERWRITE_ONLY``: Perform a pure overwrite for this range
+ or fail with ``-EAGAIN``.
+ This can be used by filesystems with complex unaligned I/O
+ write paths to provide an optimised fast path for unaligned writes.
+ If a pure overwrite can be performed, then serialisation against
+ other I/Os to the same filesystem block(s) is unnecessary as there is
+ no risk of stale data exposure or data loss.
+ If a pure overwrite cannot be performed, then the filesystem can
+ perform the serialisation steps needed to provide exclusive access
+ to the unaligned I/O range so that it can perform allocation and
+ sub-block zeroing safely.
+ Filesystems can use this flag to try to reduce locking contention,
+ but a lot of `detailed checking
+ <https://lore.kernel.org/linux-ext4/[email protected]/>`_
+ is required to do it `correctly
+ <https://lore.kernel.org/linux-ext4/[email protected]/>`_.
+
+ * ``IOMAP_DIO_PARTIAL``: If a page fault occurs, return whatever
+ progress has already been made.
+ The caller may deal with the page fault and retry the operation.
+ If the caller decides to retry the operation, it should pass the
+ accumulated return values of all previous calls as the
+ ``done_before`` parameter to the next call.
+
+These ``struct kiocb`` flags are significant for direct I/O with iomap:
+
+ * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
+
+ * ``IOCB_SYNC``: Ensure that the device has persisted data to disk
+ before completing the call.
+ In the case of pure overwrites, the I/O may be issued with FUA
+ enabled.
+
+ * ``IOCB_HIPRI``: Poll for I/O completion instead of waiting for an
+ interrupt.
+ Only meaningful for asynchronous I/O, and only if the entire I/O can
+ be issued as a single ``struct bio``.
+
+ * ``IOCB_DIO_CALLER_COMP``: Try to run I/O completion from the caller's
+ process context.
+ See ``linux/fs.h`` for more details.
+
+Filesystems should call ``iomap_dio_rw`` from ``->read_iter`` and
+``->write_iter``, and set ``FMODE_CAN_ODIRECT`` in the ``->open``
+function for the file.
+They should not set ``->direct_IO``, which is deprecated.
+
+If a filesystem wishes to perform its own work before direct I/O
+completion, it should call ``__iomap_dio_rw``.
+If its return value is not an error pointer or a NULL pointer, the
+filesystem should pass the return value to ``iomap_dio_complete`` after
+finishing its internal work.
+
+Return Values
+-------------
+
+``iomap_dio_rw`` can return one of the following:
+
+ * A non-negative number of bytes transferred.
+
+ * ``-ENOTBLK``: Fall back to buffered I/O.
+ iomap itself will return this value if it cannot invalidate the page
+ cache before issuing the I/O to storage.
+ The ``->iomap_begin`` or ``->iomap_end`` functions may also return
+ this value.
+
+ * ``-EIOCBQUEUED``: The asynchronous direct I/O request has been
+ queued and will be completed separately.
+
+ * Any of the other negative error codes.
+
+Direct Reads
+------------
+
+A direct I/O read initiates a read I/O from the storage device to the
+caller's buffer.
+Dirty parts of the pagecache are flushed to storage before initiating
+the read io.
+The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DIRECT`` with
+any combination of the following enhancements:
+
+ * ``IOMAP_NOWAIT``, as defined previously.
+
+Callers commonly hold ``i_rwsem`` in shared mode before calling this
+function.
+
+Direct Writes
+-------------
+
+A direct I/O write initiates a write I/O to the storage device from the
+caller's buffer.
+Dirty parts of the pagecache are flushed to storage before initiating
+the write io.
+The pagecache is invalidated both before and after the write io.
+The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DIRECT |
+IOMAP_WRITE`` with any combination of the following enhancements:
+
+ * ``IOMAP_NOWAIT``, as defined previously.
+
+ * ``IOMAP_OVERWRITE_ONLY``: Allocating blocks and zeroing partial
+ blocks is not allowed.
+ The entire file range must map to a single written or unwritten
+ extent.
+ The file I/O range must be aligned to the filesystem block size
+ if the mapping is unwritten and the filesystem cannot handle zeroing
+ the unaligned regions without exposing stale contents.
+
+Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
+calling this function.
+
+``struct iomap_dio_ops:``
+-------------------------
+.. code-block:: c
+
+ struct iomap_dio_ops {
+ void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
+ loff_t file_offset);
+ int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+ unsigned flags);
+ struct bio_set *bio_set;
+ };
+
+The fields of this structure are as follows:
+
+ - ``submit_io``: iomap calls this function when it has constructed a
+ ``struct bio`` object for the I/O requested, and wishes to submit it
+ to the block device.
+ If no function is provided, ``submit_bio`` will be called directly.
+ Filesystems that would like to perform additional work before (e.g.
+ data replication for btrfs) should implement this function.
+
+ - ``end_io``: This is called after the ``struct bio`` completes.
+ This function should perform post-write conversions of unwritten
+ extent mappings, handle write failures, etc.
+ The ``flags`` argument may be set to a combination of the following:
+
+ * ``IOMAP_DIO_UNWRITTEN``: The mapping was unwritten, so the ioend
+ should mark the extent as written.
+
+ * ``IOMAP_DIO_COW``: Writing to the space in the mapping required a
+ copy on write operation, so the ioend should switch mappings.
+
+ - ``bio_set``: This allows the filesystem to provide a custom bio_set
+ for allocating direct I/O bios.
+ This enables filesystems to `stash additional per-bio information
+ <https://lore.kernel.org/all/[email protected]/>`_
+ for private use.
+ If this field is NULL, generic ``struct bio`` objects will be used.
+
+Filesystems that want to perform extra work after an I/O completion
+should set a custom ``->bi_end_io`` function via ``->submit_io``.
+Afterwards, the custom endio function must call
+``iomap_dio_bio_end_io`` to finish the direct I/O.
+
+DAX I/O
+=======
+
+Some storage devices can be directly mapped as memory.
+These devices support a new access mode known as "fsdax" that allows
+loads and stores through the CPU and memory controller.
+
+fsdax Reads
+-----------
+
+A fsdax read performs a memcpy from storage device to the caller's
+buffer.
+The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DAX`` with any
+combination of the following enhancements:
+
+ * ``IOMAP_NOWAIT``, as defined previously.
+
+Callers commonly hold ``i_rwsem`` in shared mode before calling this
+function.
+
+fsdax Writes
+------------
+
+A fsdax write initiates a memcpy to the storage device from the caller's
+buffer.
+The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DAX |
+IOMAP_WRITE`` with any combination of the following enhancements:
+
+ * ``IOMAP_NOWAIT``, as defined previously.
+
+ * ``IOMAP_OVERWRITE_ONLY``: The caller requires a pure overwrite to be
+ performed from this mapping.
+ This requires the filesystem extent mapping to already exist as an
+ ``IOMAP_MAPPED`` type and span the entire range of the write I/O
+ request.
+ If the filesystem cannot map this request in a way that allows the
+ iomap infrastructure to perform a pure overwrite, it must fail the
+ mapping operation with ``-EAGAIN``.
+
+Callers commonly hold ``i_rwsem`` in exclusive mode before calling this
+function.
+
+fsdax mmap Faults
+~~~~~~~~~~~~~~~~~
+
+The ``dax_iomap_fault`` function handles read and write faults to fsdax
+storage.
+For a read fault, ``IOMAP_DAX | IOMAP_FAULT`` will be passed as the
+``flags`` argument to ``->iomap_begin``.
+For a write fault, ``IOMAP_DAX | IOMAP_FAULT | IOMAP_WRITE`` will be
+passed as the ``flags`` argument to ``->iomap_begin``.
+
+Callers commonly hold the same locks as they do to call their iomap
+pagecache counterparts.
+
+fsdax Truncation, fallocate, and Unsharing
+------------------------------------------
+
+For fsdax files, the following functions are provided to replace their
+iomap pagecache I/O counterparts.
+The ``flags`` argument to ``->iomap_begin`` are the same as the
+pagecache counterparts, with ``IOMAP_DAX`` added.
+
+ * ``dax_file_unshare``
+ * ``dax_zero_range``
+ * ``dax_truncate_page``
+
+Callers commonly hold the same locks as they do to call their iomap
+pagecache counterparts.
+
+fsdax Deduplication
+-------------------
+
+Filesystems implementing the ``FIDEDUPERANGE`` ioctl must call the
+``dax_remap_file_range_prep`` function with their own iomap read ops.
+
+Seeking Files
+=============
+
+iomap implements the two iterating whence modes of the ``llseek`` system
+call.
+
+SEEK_DATA
+---------
+
+The ``iomap_seek_data`` function implements the SEEK_DATA "whence" value
+for llseek.
+``IOMAP_REPORT`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+
+For unwritten mappings, the pagecache will be searched.
+Regions of the pagecache with a folio mapped and uptodate fsblocks
+within those folios will be reported as data areas.
+
+Callers commonly hold ``i_rwsem`` in shared mode before calling this
+function.
+
+SEEK_HOLE
+---------
+
+The ``iomap_seek_hole`` function implements the SEEK_HOLE "whence" value
+for llseek.
+``IOMAP_REPORT`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+
+For unwritten mappings, the pagecache will be searched.
+Regions of the pagecache with no folio mapped, or a !uptodate fsblock
+within a folio will be reported as sparse hole areas.
+
+Callers commonly hold ``i_rwsem`` in shared mode before calling this
+function.
+
+Swap File Activation
+====================
+
+The ``iomap_swapfile_activate`` function finds all the base-page aligned
+regions in a file and sets them up as swap space.
+The file will be ``fsync()``'d before activation.
+``IOMAP_REPORT`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+All mappings must be mapped or unwritten; cannot be dirty or shared, and
+cannot span multiple block devices.
+Callers must hold ``i_rwsem`` in exclusive mode; this is already
+provided by ``swapon``.
+
+File Space Mapping Reporting
+============================
+
+iomap implements two of the file space mapping system calls.
+
+FS_IOC_FIEMAP
+-------------
+
+The ``iomap_fiemap`` function exports file extent mappings to userspace
+in the format specified by the ``FS_IOC_FIEMAP`` ioctl.
+``IOMAP_REPORT`` will be passed as the ``flags`` argument to
+``->iomap_begin``.
+Callers commonly hold ``i_rwsem`` in shared mode before calling this
+function.
+
+FIBMAP (deprecated)
+-------------------
+
+``iomap_bmap`` implements FIBMAP.
+The calling conventions are the same as for FIEMAP.
+This function is only provided to maintain compatibility for filesystems
+that implemented FIBMAP prior to conversion.
+This ioctl is deprecated; do **not** add a FIBMAP implementation to
+filesystems that do not have it.
+Callers should probably hold ``i_rwsem`` in shared mode before calling
+this function, but this is unclear.
diff --git a/Documentation/filesystems/iomap/porting.rst b/Documentation/filesystems/iomap/porting.rst
new file mode 100644
index 000000000000..3d49a32c0fff
--- /dev/null
+++ b/Documentation/filesystems/iomap/porting.rst
@@ -0,0 +1,120 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _iomap_porting:
+
+..
+ Dumb style notes to maintain the author's sanity:
+ Please try to start sentences on separate lines so that
+ sentence changes don't bleed colors in diff.
+ Heading decorations are documented in sphinx.rst.
+
+=======================
+Porting Your Filesystem
+=======================
+
+.. contents:: Table of Contents
+ :local:
+
+Why Convert?
+============
+
+There are several reasons to convert a filesystem to iomap:
+
+ 1. The classic Linux I/O path is not terribly efficient.
+ Pagecache operations lock a single base page at a time and then call
+ into the filesystem to return a mapping for only that page.
+ Direct I/O operations build I/O requests a single file block at a
+ time.
+ This worked well enough for direct/indirect-mapped filesystems such
+ as ext2, but is very inefficient for extent-based filesystems such
+ as XFS.
+
+ 2. Large folios are only supported via iomap; there are no plans to
+ convert the old buffer_head path to use them.
+
+ 3. Direct access to storage on memory-like devices (fsdax) is only
+ supported via iomap.
+
+ 4. Lower maintenance overhead for individual filesystem maintainers.
+ iomap handles common pagecache related operations itself, such as
+ allocating, instantiating, locking, and unlocking of folios.
+ No ->write_begin(), ->write_end() or direct_IO
+ address_space_operations are required to be implemented by
+ filesystem using iomap.
+
+How Do I Convert a Filesystem?
+==============================
+
+First, add ``#include <linux/iomap.h>`` from your source code and add
+``select FS_IOMAP`` to your filesystem's Kconfig option.
+Build the kernel, run fstests with the ``-g all`` option across a wide
+variety of your filesystem's supported configurations to build a
+baseline of which tests pass and which ones fail.
+
+The recommended approach is first to implement ``->iomap_begin`` (and
+``->iomap_end`` if necessary) to allow iomap to obtain a read-only
+mapping of a file range.
+In most cases, this is a relatively trivial conversion of the existing
+``get_block()`` function for read-only mappings.
+``FS_IOC_FIEMAP`` is a good first target because it is trivial to
+implement support for it and then to determine that the extent map
+iteration is correct from userspace.
+If FIEMAP is returning the correct information, it's a good sign that
+other read-only mapping operations will do the right thing.
+
+Next, modify the filesystem's ``get_block(create = false)``
+implementation to use the new ``->iomap_begin`` implementation to map
+file space for selected read operations.
+Hide behind a debugging knob the ability to switch on the iomap mapping
+functions for selected call paths.
+It is necessary to write some code to fill out the bufferhead-based
+mapping information from the ``iomap`` structure, but the new functions
+can be tested without needing to implement any iomap APIs.
+
+Once the read-only functions are working like this, convert each high
+level file operation one by one to use iomap native APIs instead of
+going through ``get_block()``.
+Done one at a time, regressions should be self evident.
+You *do* have a regression test baseline for fstests, right?
+It is suggested to convert swap file activation, ``SEEK_DATA``, and
+``SEEK_HOLE`` before tackling the I/O paths.
+A likely complexity at this point will be converting the buffered read
+I/O path because of bufferheads.
+The buffered read I/O paths doesn't need to be converted yet, though the
+direct I/O read path should be converted in this phase.
+
+At this point, you should look over your ``->iomap_begin`` function.
+If it switches between large blocks of code based on dispatching of the
+``flags`` argument, you should consider breaking it up into
+per-operation iomap ops with smaller, more cohesive functions.
+XFS is a good example of this.
+
+The next thing to do is implement ``get_blocks(create == true)``
+functionality in the ``->iomap_begin``/``->iomap_end`` methods.
+It is strongly recommended to create separate mapping functions and
+iomap ops for write operations.
+Then convert the direct I/O write path to iomap, and start running fsx
+w/ DIO enabled in earnest on filesystem.
+This will flush out lots of data integrity corner case bugs that the new
+write mapping implementation introduces.
+
+Now, convert any remaining file operations to call the iomap functions.
+This will get the entire filesystem using the new mapping functions, and
+they should largely be debugged and working correctly after this step.
+
+Most likely at this point, the buffered read and write paths will still
+need to be converted.
+The mapping functions should all work correctly, so all that needs to be
+done is rewriting all the code that interfaces with bufferheads to
+interface with iomap and folios.
+It is much easier first to get regular file I/O (without any fancy
+features like fscrypt, fsverity, compression, or data=journaling)
+converted to use iomap.
+Some of those fancy features (fscrypt and compression) aren't
+implemented yet in iomap.
+For unjournalled filesystems that use the pagecache for symbolic links
+and directories, you might also try converting their handling to iomap.
+
+The rest is left as an exercise for the reader, as it will be different
+for every filesystem.
+If you encounter problems, email the people and lists in
+``get_maintainers.pl`` for help.
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index 9aaf6ef75eb5..317934c9e8fc 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -645,6 +645,8 @@ The members are as follows:
fs_param_is_blockdev Blockdev path * Needs lookup
fs_param_is_path Path * Needs lookup
fs_param_is_fd File descriptor result->int_32
+ fs_param_is_uid User ID (u32) result->uid
+ fs_param_is_gid Group ID (u32) result->gid
======================= ======================= =====================
Note that if the value is of fs_param_is_bool type, fs_parse() will try
@@ -678,6 +680,8 @@ The members are as follows:
fsparam_bdev() fs_param_is_blockdev
fsparam_path() fs_param_is_path
fsparam_fd() fs_param_is_fd
+ fsparam_uid() fs_param_is_uid
+ fsparam_gid() fs_param_is_gid
======================= ===============================================
all of which take two arguments, name string and option number - for
@@ -784,8 +788,9 @@ process the parameters it is given.
option number (which it returns).
If successful, and if the parameter type indicates the result is a
- boolean, integer or enum type, the value is converted by this function and
- the result stored in result->{boolean,int_32,uint_32,uint_64}.
+ boolean, integer, enum, uid, or gid type, the value is converted by this
+ function and the result stored in
+ result->{boolean,int_32,uint_32,uint_64,uid,gid}.
If a match isn't initially made, the key is prefixed with "no" and no
value is present then an attempt will be made to look up the key with the
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 7c3a565ffbef..82d142de3461 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -571,6 +571,7 @@ encoded manner. The codes are the following:
um userfaultfd missing tracking
uw userfaultfd wr-protect tracking
ss shadow stack page
+ sl sealed
== =======================================
Note that there is no guarantee that every flag and associated mnemonic will
diff --git a/Documentation/i2c/i2c_bus.svg b/Documentation/i2c/i2c_bus.svg
index 3170de976373..45801de4af7d 100644
--- a/Documentation/i2c/i2c_bus.svg
+++ b/Documentation/i2c/i2c_bus.svg
@@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<!-- Updated to inclusive terminology by Wolfram Sang -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
@@ -1120,7 +1121,7 @@
<rect
style="opacity:1;fill:#ffb9b9;fill-opacity:1;stroke:#f00000;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="rect4424-3-2-9-7"
- width="112.5"
+ width="134.5"
height="113.75008"
x="112.5"
y="471.11221"
@@ -1133,15 +1134,15 @@
y="521.46259"
id="text4349"><tspan
sodipodi:role="line"
- x="167.5354"
+ x="178.5354"
y="521.46259"
style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
id="tspan1273">I2C</tspan><tspan
sodipodi:role="line"
- x="167.5354"
+ x="178.5354"
y="552.71259"
style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
- id="tspan1285">Master</tspan></text>
+ id="tspan1285">Controller</tspan></text>
<rect
style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
id="rect4424-3-2-9-7-3-3-5-3"
@@ -1171,7 +1172,7 @@
x="318.59131"
y="552.08752"
style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
- id="tspan1287">Slave</tspan></text>
+ id="tspan1287">Target</tspan></text>
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 112.49995,677.36223 c 712.50005,0 712.50005,0 712.50005,0"
@@ -1233,7 +1234,7 @@
x="468.59131"
y="552.08746"
style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
- id="tspan1287-6">Slave</tspan></text>
+ id="tspan1287-6">Target</tspan></text>
<rect
style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
id="rect4424-3-2-9-7-3-3-5-3-1"
@@ -1258,7 +1259,7 @@
x="618.59131"
y="552.08746"
style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
- id="tspan1287-9">Slave</tspan></text>
+ id="tspan1287-9">Target</tspan></text>
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM)"
d="m 150,583.61221 v 93.75"
diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst
index 786c618ba3be..579a1c7df200 100644
--- a/Documentation/i2c/summary.rst
+++ b/Documentation/i2c/summary.rst
@@ -3,29 +3,27 @@ Introduction to I2C and SMBus
=============================
I²C (pronounce: I squared C and written I2C in the kernel documentation) is
-a protocol developed by Philips. It is a slow two-wire protocol (variable
-speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides
+a protocol developed by Philips. It is a two-wire protocol with variable
+speed (typically up to 400 kHz, high speed modes up to 5 MHz). It provides
an inexpensive bus for connecting many types of devices with infrequent or
-low bandwidth communications needs. I2C is widely used with embedded
-systems. Some systems use variants that don't meet branding requirements,
+low bandwidth communications needs. I2C is widely used with embedded
+systems. Some systems use variants that don't meet branding requirements,
and so are not advertised as being I2C but come under different names,
e.g. TWI (Two Wire Interface), IIC.
-The latest official I2C specification is the `"I2C-bus specification and user
-manual" (UM10204) <https://www.nxp.com/webapp/Download?colCode=UM10204>`_
-published by NXP Semiconductors. However, you need to log-in to the site to
-access the PDF. An older version of the specification (revision 6) is archived
-`here <https://web.archive.org/web/20210813122132/https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_.
+The latest official I2C specification is the `"I²C-bus specification and user
+manual" (UM10204) <https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_
+published by NXP Semiconductors, version 7 as of this writing.
SMBus (System Management Bus) is based on the I2C protocol, and is mostly
-a subset of I2C protocols and signaling. Many I2C devices will work on an
+a subset of I2C protocols and signaling. Many I2C devices will work on an
SMBus, but some SMBus protocols add semantics beyond what is required to
-achieve I2C branding. Modern PC mainboards rely on SMBus. The most common
+achieve I2C branding. Modern PC mainboards rely on SMBus. The most common
devices connected through SMBus are RAM modules configured using I2C EEPROMs,
and hardware monitoring chips.
Because the SMBus is mostly a subset of the generalized I2C bus, we can
-use its protocols on many I2C systems. However, there are systems that don't
+use its protocols on many I2C systems. However, there are systems that don't
meet both SMBus and I2C electrical constraints; and others which can't
implement all the common SMBus protocol semantics or messages.
@@ -33,29 +31,52 @@ implement all the common SMBus protocol semantics or messages.
Terminology
===========
-Using the terminology from the official documentation, the I2C bus connects
-one or more *master* chips and one or more *slave* chips.
+The I2C bus connects one or more controller chips and one or more target chips.
.. kernel-figure:: i2c_bus.svg
- :alt: Simple I2C bus with one master and 3 slaves
+ :alt: Simple I2C bus with one controller and 3 targets
Simple I2C bus
-A **master** chip is a node that starts communications with slaves. In the
-Linux kernel implementation it is called an **adapter** or bus. Adapter
-drivers are in the ``drivers/i2c/busses/`` subdirectory.
+A **controller** chip is a node that starts communications with targets. In the
+Linux kernel implementation it is also called an "adapter" or "bus". Controller
+drivers are usually in the ``drivers/i2c/busses/`` subdirectory.
-An **algorithm** contains general code that can be used to implement a
-whole class of I2C adapters. Each specific adapter driver either depends on
-an algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes
-its own implementation.
+An **algorithm** contains general code that can be used to implement a whole
+class of I2C controllers. Each specific controller driver either depends on an
+algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes its
+own implementation.
-A **slave** chip is a node that responds to communications when addressed
-by the master. In Linux it is called a **client**. Client drivers are kept
-in a directory specific to the feature they provide, for example
-``drivers/media/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for
+A **target** chip is a node that responds to communications when addressed by a
+controller. In the Linux kernel implementation it is also called a "client".
+While targets are usually separate external chips, Linux can also act as a
+target (needs hardware support) and respond to another controller on the bus.
+This is then called a **local target**. In contrast, an external chip is called
+a **remote target**.
+
+Target drivers are kept in a directory specific to the feature they provide,
+for example ``drivers/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for
video-related chips.
-For the example configuration in figure, you will need a driver for your
-I2C adapter, and drivers for your I2C devices (usually one driver for each
-device).
+For the example configuration in the figure above, you will need one driver for
+the I2C controller, and drivers for your I2C targets. Usually one driver for
+each target.
+
+Synonyms
+--------
+
+As mentioned above, the Linux I2C implementation historically uses the terms
+"adapter" for controller and "client" for target. A number of data structures
+have these synonyms in their name. So, when discussing implementation details,
+you should be aware of these terms as well. The official wording is preferred,
+though.
+
+Outdated terminology
+--------------------
+
+In earlier I2C specifications, controller was named "master" and target was
+named "slave". These terms have been obsoleted with v7 of the specification and
+their use is also discouraged by the Linux Kernel Code of Conduct. You may
+still find them in references to documentation which has not been updated. The
+general attitude, however, is to use the inclusive terms: controller and
+target. Work to replace the old terminology in the Linux Kernel is on-going.
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index a1f3eb7a43e2..131863142cbb 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -128,7 +128,7 @@ executed to make module versioning work.
modules_install
Install the external module(s). The default location is
- /lib/modules/<kernel_release>/extra/, but a prefix may
+ /lib/modules/<kernel_release>/updates/, but a prefix may
be added with INSTALL_MOD_PATH (discussed in section 5).
clean
@@ -417,7 +417,7 @@ directory:
And external modules are installed in:
- /lib/modules/$(KERNELRELEASE)/extra/
+ /lib/modules/$(KERNELRELEASE)/updates/
5.1 INSTALL_MOD_PATH
--------------------
@@ -438,10 +438,10 @@ And external modules are installed in:
-------------------
External modules are by default installed to a directory under
- /lib/modules/$(KERNELRELEASE)/extra/, but you may wish to
+ /lib/modules/$(KERNELRELEASE)/updates/, but you may wish to
locate modules for a specific functionality in a separate
directory. For this purpose, use INSTALL_MOD_DIR to specify an
- alternative name to "extra."::
+ alternative name to "updates."::
$ make INSTALL_MOD_DIR=gandalf -C $KDIR \
M=$PWD modules_install
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 00dc61358be8..4510e8d1adcb 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -1603,7 +1603,7 @@ operations:
attributes:
- header
reply:
- attributes: &pse
+ attributes:
- header
- podl-pse-admin-state
- podl-pse-admin-control
@@ -1620,7 +1620,10 @@ operations:
do:
request:
- attributes: *pse
+ attributes:
+ - header
+ - podl-pse-admin-control
+ - c33-pse-admin-control
-
name: rss-get
doc: Get RSS params.
diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml
index d21234097167..6bda7a467301 100644
--- a/Documentation/netlink/specs/nfsd.yaml
+++ b/Documentation/netlink/specs/nfsd.yaml
@@ -123,8 +123,6 @@ operations:
doc: dump pending nfsd rpc
attribute-set: rpc-status
dump:
- pre: nfsd-nl-rpc-status-get-start
- post: nfsd-nl-rpc-status-get-done
reply:
attributes:
- xid
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 9232cd7da301..5d0b68f752c0 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -49,7 +49,7 @@ example usage
$ devlink region show [ DEV/REGION ]
$ devlink region del DEV/REGION snapshot SNAPSHOT_ID
$ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
- $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length
+ $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH
# Show all of the exposed regions with region sizes:
$ devlink region show
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 5926115ec0ed..8a251d71fa6e 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -32,6 +32,7 @@ Security-related interfaces
seccomp_filter
landlock
lsm
+ mfd_noexec
spec_ctrl
tee
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a141e8e65c5d..9a97030c6c8d 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -186,6 +186,7 @@ Code Seq# Include File Comments
'Q' all linux/soundcard.h
'R' 00-1F linux/random.h conflict!
'R' 01 linux/rfkill.h conflict!
+'R' 20-2F linux/trace_mmap.h
'R' C0-DF net/bluetooth/rfcomm.h
'R' E0 uapi/linux/fsl_mc.h
'S' all linux/cdrom.h conflict!
diff --git a/Documentation/userspace-api/mfd_noexec.rst b/Documentation/userspace-api/mfd_noexec.rst
new file mode 100644
index 000000000000..7afcc480e38f
--- /dev/null
+++ b/Documentation/userspace-api/mfd_noexec.rst
@@ -0,0 +1,86 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+Introduction of non-executable mfd
+==================================
+:Author:
+ Daniel Verkamp <[email protected]>
+ Jeff Xu <[email protected]>
+
+:Contributor:
+ Aleksa Sarai <[email protected]>
+
+Since Linux introduced the memfd feature, memfds have always had their
+execute bit set, and the memfd_create() syscall doesn't allow setting
+it differently.
+
+However, in a secure-by-default system, such as ChromeOS, (where all
+executables should come from the rootfs, which is protected by verified
+boot), this executable nature of memfd opens a door for NoExec bypass
+and enables “confused deputy attack”. E.g, in VRP bug [1]: cros_vm
+process created a memfd to share the content with an external process,
+however the memfd is overwritten and used for executing arbitrary code
+and root escalation. [2] lists more VRP of this kind.
+
+On the other hand, executable memfd has its legit use: runc uses memfd’s
+seal and executable feature to copy the contents of the binary then
+execute them. For such a system, we need a solution to differentiate runc's
+use of executable memfds and an attacker's [3].
+
+To address those above:
+ - Let memfd_create() set X bit at creation time.
+ - Let memfd be sealed for modifying X bit when NX is set.
+ - Add a new pid namespace sysctl: vm.memfd_noexec to help applications in
+ migrating and enforcing non-executable MFD.
+
+User API
+========
+``int memfd_create(const char *name, unsigned int flags)``
+
+``MFD_NOEXEC_SEAL``
+ When MFD_NOEXEC_SEAL bit is set in the ``flags``, memfd is created
+ with NX. F_SEAL_EXEC is set and the memfd can't be modified to
+ add X later. MFD_ALLOW_SEALING is also implied.
+ This is the most common case for the application to use memfd.
+
+``MFD_EXEC``
+ When MFD_EXEC bit is set in the ``flags``, memfd is created with X.
+
+Note:
+ ``MFD_NOEXEC_SEAL`` implies ``MFD_ALLOW_SEALING``. In case that
+ an app doesn't want sealing, it can add F_SEAL_SEAL after creation.
+
+
+Sysctl:
+========
+``pid namespaced sysctl vm.memfd_noexec``
+
+The new pid namespaced sysctl vm.memfd_noexec has 3 values:
+
+ - 0: MEMFD_NOEXEC_SCOPE_EXEC
+ memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like
+ MFD_EXEC was set.
+
+ - 1: MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL
+ memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like
+ MFD_NOEXEC_SEAL was set.
+
+ - 2: MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED
+ memfd_create() without MFD_NOEXEC_SEAL will be rejected.
+
+The sysctl allows finer control of memfd_create for old software that
+doesn't set the executable bit; for example, a container with
+vm.memfd_noexec=1 means the old software will create non-executable memfd
+by default while new software can create executable memfd by setting
+MFD_EXEC.
+
+The value of vm.memfd_noexec is passed to child namespace at creation
+time. In addition, the setting is hierarchical, i.e. during memfd_create,
+we will search from current ns to root ns and use the most restrictive
+setting.
+
+[1] https://crbug.com/1305267
+
+[2] https://bugs.chromium.org/p/chromium/issues/list?q=type%3Dbug-security%20memfd%20escalation&can=1
+
+[3] https://lwn.net/Articles/781013/
diff --git a/Documentation/virt/hyperv/clocks.rst b/Documentation/virt/hyperv/clocks.rst
index a56f4837d443..176043265803 100644
--- a/Documentation/virt/hyperv/clocks.rst
+++ b/Documentation/virt/hyperv/clocks.rst
@@ -62,12 +62,21 @@ shared page with scale and offset values into user space. User
space code performs the same algorithm of reading the TSC and
applying the scale and offset to get the constant 10 MHz clock.
-Linux clockevents are based on Hyper-V synthetic timer 0. While
-Hyper-V offers 4 synthetic timers for each CPU, Linux only uses
-timer 0. Interrupts from stimer0 are recorded on the "HVS" line in
-/proc/interrupts. Clockevents based on the virtualized PIT and
-local APIC timer also work, but the Hyper-V synthetic timer is
-preferred.
+Linux clockevents are based on Hyper-V synthetic timer 0 (stimer0).
+While Hyper-V offers 4 synthetic timers for each CPU, Linux only uses
+timer 0. In older versions of Hyper-V, an interrupt from stimer0
+results in a VMBus control message that is demultiplexed by
+vmbus_isr() as described in the Documentation/virt/hyperv/vmbus.rst
+documentation. In newer versions of Hyper-V, stimer0 interrupts can
+be mapped to an architectural interrupt, which is referred to as
+"Direct Mode". Linux prefers to use Direct Mode when available. Since
+x86/x64 doesn't support per-CPU interrupts, Direct Mode statically
+allocates an x86 interrupt vector (HYPERV_STIMER0_VECTOR) across all CPUs
+and explicitly codes it to call the stimer0 interrupt handler. Hence
+interrupts from stimer0 are recorded on the "HVS" line in /proc/interrupts
+rather than being associated with a Linux IRQ. Clockevents based on the
+virtualized PIT and local APIC timer also work, but Hyper-V stimer0
+is preferred.
The driver for the Hyper-V synthetic system clock and timers is
drivers/clocksource/hyperv_timer.c.
diff --git a/Documentation/virt/hyperv/overview.rst b/Documentation/virt/hyperv/overview.rst
index cd493332c88a..77408a89d1a4 100644
--- a/Documentation/virt/hyperv/overview.rst
+++ b/Documentation/virt/hyperv/overview.rst
@@ -40,7 +40,7 @@ Linux guests communicate with Hyper-V in four different ways:
arm64, these synthetic registers must be accessed using explicit
hypercalls.
-* VMbus: VMbus is a higher-level software construct that is built on
+* VMBus: VMBus is a higher-level software construct that is built on
the other 3 mechanisms. It is a message passing interface between
the Hyper-V host and the Linux guest. It uses memory that is shared
between Hyper-V and the guest, along with various signaling
@@ -54,8 +54,8 @@ x86/x64 architecture only.
.. _Hyper-V Top Level Functional Spec (TLFS): https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs
-VMbus is not documented. This documentation provides a high-level
-overview of VMbus and how it works, but the details can be discerned
+VMBus is not documented. This documentation provides a high-level
+overview of VMBus and how it works, but the details can be discerned
only from the code.
Sharing Memory
@@ -74,7 +74,7 @@ follows:
physical address space. How Hyper-V is told about the GPA or list
of GPAs varies. In some cases, a single GPA is written to a
synthetic register. In other cases, a GPA or list of GPAs is sent
- in a VMbus message.
+ in a VMBus message.
* Hyper-V translates the GPAs into "real" physical memory addresses,
and creates a virtual mapping that it can use to access the memory.
@@ -133,9 +133,9 @@ only the CPUs actually present in the VM, so Linux does not report
any hot-add CPUs.
A Linux guest CPU may be taken offline using the normal Linux
-mechanisms, provided no VMbus channel interrupts are assigned to
-the CPU. See the section on VMbus Interrupts for more details
-on how VMbus channel interrupts can be re-assigned to permit
+mechanisms, provided no VMBus channel interrupts are assigned to
+the CPU. See the section on VMBus Interrupts for more details
+on how VMBus channel interrupts can be re-assigned to permit
taking a CPU offline.
32-bit and 64-bit
@@ -169,14 +169,14 @@ and functionality. Hyper-V indicates feature/function availability
via flags in synthetic MSRs that Hyper-V provides to the guest,
and the guest code tests these flags.
-VMbus has its own protocol version that is negotiated during the
-initial VMbus connection from the guest to Hyper-V. This version
+VMBus has its own protocol version that is negotiated during the
+initial VMBus connection from the guest to Hyper-V. This version
number is also output to dmesg during boot. This version number
is checked in a few places in the code to determine if specific
functionality is present.
-Furthermore, each synthetic device on VMbus also has a protocol
-version that is separate from the VMbus protocol version. Device
+Furthermore, each synthetic device on VMBus also has a protocol
+version that is separate from the VMBus protocol version. Device
drivers for these synthetic devices typically negotiate the device
protocol version, and may test that protocol version to determine
if specific device functionality is present.
diff --git a/Documentation/virt/hyperv/vmbus.rst b/Documentation/virt/hyperv/vmbus.rst
index d2012d9022c5..1dcef6a7fda3 100644
--- a/Documentation/virt/hyperv/vmbus.rst
+++ b/Documentation/virt/hyperv/vmbus.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-VMbus
+VMBus
=====
-VMbus is a software construct provided by Hyper-V to guest VMs. It
+VMBus is a software construct provided by Hyper-V to guest VMs. It
consists of a control path and common facilities used by synthetic
devices that Hyper-V presents to guest VMs. The control path is
used to offer synthetic devices to the guest VM and, in some cases,
@@ -12,9 +12,9 @@ and the synthetic device implementation that is part of Hyper-V, and
signaling primitives to allow Hyper-V and the guest to interrupt
each other.
-VMbus is modeled in Linux as a bus, with the expected /sys/bus/vmbus
-entry in a running Linux guest. The VMbus driver (drivers/hv/vmbus_drv.c)
-establishes the VMbus control path with the Hyper-V host, then
+VMBus is modeled in Linux as a bus, with the expected /sys/bus/vmbus
+entry in a running Linux guest. The VMBus driver (drivers/hv/vmbus_drv.c)
+establishes the VMBus control path with the Hyper-V host, then
registers itself as a Linux bus driver. It implements the standard
bus functions for adding and removing devices to/from the bus.
@@ -49,9 +49,9 @@ synthetic NIC is referred to as "netvsc" and the Linux driver for
the synthetic SCSI controller is "storvsc". These drivers contain
functions with names like "storvsc_connect_to_vsp".
-VMbus channels
+VMBus channels
--------------
-An instance of a synthetic device uses VMbus channels to communicate
+An instance of a synthetic device uses VMBus channels to communicate
between the VSP and the VSC. Channels are bi-directional and used
for passing messages. Most synthetic devices use a single channel,
but the synthetic SCSI controller and synthetic NIC may use multiple
@@ -73,7 +73,7 @@ write indices and some control flags, followed by the memory for the
actual ring. The size of the ring is determined by the VSC in the
guest and is specific to each synthetic device. The list of GPAs
making up the ring is communicated to the Hyper-V host over the
-VMbus control path as a GPA Descriptor List (GPADL). See function
+VMBus control path as a GPA Descriptor List (GPADL). See function
vmbus_establish_gpadl().
Each ring buffer is mapped into contiguous Linux kernel virtual
@@ -102,10 +102,10 @@ resources. For Windows Server 2019 and later, this limit is
approximately 1280 Mbytes. For versions prior to Windows Server
2019, the limit is approximately 384 Mbytes.
-VMbus messages
---------------
-All VMbus messages have a standard header that includes the message
-length, the offset of the message payload, some flags, and a
+VMBus channel messages
+----------------------
+All messages sent in a VMBus channel have a standard header that includes
+the message length, the offset of the message payload, some flags, and a
transactionID. The portion of the message after the header is
unique to each VSP/VSC pair.
@@ -137,7 +137,7 @@ control message contains a list of GPAs that describe the data
buffer. For example, the storvsc driver uses this approach to
specify the data buffers to/from which disk I/O is done.
-Three functions exist to send VMbus messages:
+Three functions exist to send VMBus channel messages:
1. vmbus_sendpacket(): Control-only messages and messages with
embedded data -- no GPAs
@@ -154,20 +154,51 @@ Historically, Linux guests have trusted Hyper-V to send well-formed
and valid messages, and Linux drivers for synthetic devices did not
fully validate messages. With the introduction of processor
technologies that fully encrypt guest memory and that allow the
-guest to not trust the hypervisor (AMD SNP-SEV, Intel TDX), trusting
+guest to not trust the hypervisor (AMD SEV-SNP, Intel TDX), trusting
the Hyper-V host is no longer a valid assumption. The drivers for
-VMbus synthetic devices are being updated to fully validate any
+VMBus synthetic devices are being updated to fully validate any
values read from memory that is shared with Hyper-V, which includes
-messages from VMbus devices. To facilitate such validation,
+messages from VMBus devices. To facilitate such validation,
messages read by the guest from the "in" ring buffer are copied to a
temporary buffer that is not shared with Hyper-V. Validation is
performed in this temporary buffer without the risk of Hyper-V
maliciously modifying the message after it is validated but before
it is used.
-VMbus interrupts
+Synthetic Interrupt Controller (synic)
+--------------------------------------
+Hyper-V provides each guest CPU with a synthetic interrupt controller
+that is used by VMBus for host-guest communication. While each synic
+defines 16 synthetic interrupts (SINT), Linux uses only one of the 16
+(VMBUS_MESSAGE_SINT). All interrupts related to communication between
+the Hyper-V host and a guest CPU use that SINT.
+
+The SINT is mapped to a single per-CPU architectural interrupt (i.e,
+an 8-bit x86/x64 interrupt vector, or an arm64 PPI INTID). Because
+each CPU in the guest has a synic and may receive VMBus interrupts,
+they are best modeled in Linux as per-CPU interrupts. This model works
+well on arm64 where a single per-CPU Linux IRQ is allocated for
+VMBUS_MESSAGE_SINT. This IRQ appears in /proc/interrupts as an IRQ labelled
+"Hyper-V VMbus". Since x86/x64 lacks support for per-CPU IRQs, an x86
+interrupt vector is statically allocated (HYPERVISOR_CALLBACK_VECTOR)
+across all CPUs and explicitly coded to call vmbus_isr(). In this case,
+there's no Linux IRQ, and the interrupts are visible in aggregate in
+/proc/interrupts on the "HYP" line.
+
+The synic provides the means to demultiplex the architectural interrupt into
+one or more logical interrupts and route the logical interrupt to the proper
+VMBus handler in Linux. This demultiplexing is done by vmbus_isr() and
+related functions that access synic data structures.
+
+The synic is not modeled in Linux as an irq chip or irq domain,
+and the demultiplexed logical interrupts are not Linux IRQs. As such,
+they don't appear in /proc/interrupts or /proc/irq. The CPU
+affinity for one of these logical interrupts is controlled via an
+entry under /sys/bus/vmbus as described below.
+
+VMBus interrupts
----------------
-VMbus provides a mechanism for the guest to interrupt the host when
+VMBus provides a mechanism for the guest to interrupt the host when
the guest has queued new messages in a ring buffer. The host
expects that the guest will send an interrupt only when an "out"
ring buffer transitions from empty to non-empty. If the guest sends
@@ -176,63 +207,55 @@ unnecessary. If a guest sends an excessive number of unnecessary
interrupts, the host may throttle that guest by suspending its
execution for a few seconds to prevent a denial-of-service attack.
-Similarly, the host will interrupt the guest when it sends a new
-message on the VMbus control path, or when a VMbus channel "in" ring
-buffer transitions from empty to non-empty. Each CPU in the guest
-may receive VMbus interrupts, so they are best modeled as per-CPU
-interrupts in Linux. This model works well on arm64 where a single
-per-CPU IRQ is allocated for VMbus. Since x86/x64 lacks support for
-per-CPU IRQs, an x86 interrupt vector is statically allocated (see
-HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to
-call the VMbus interrupt service routine. These interrupts are
-visible in /proc/interrupts on the "HYP" line.
-
-The guest CPU that a VMbus channel will interrupt is selected by the
+Similarly, the host will interrupt the guest via the synic when
+it sends a new message on the VMBus control path, or when a VMBus
+channel "in" ring buffer transitions from empty to non-empty due to
+the host inserting a new VMBus channel message. The control message stream
+and each VMBus channel "in" ring buffer are separate logical interrupts
+that are demultiplexed by vmbus_isr(). It demultiplexes by first checking
+for channel interrupts by calling vmbus_chan_sched(), which looks at a synic
+bitmap to determine which channels have pending interrupts on this CPU.
+If multiple channels have pending interrupts for this CPU, they are
+processed sequentially. When all channel interrupts have been processed,
+vmbus_isr() checks for and processes any messages received on the VMBus
+control path.
+
+The guest CPU that a VMBus channel will interrupt is selected by the
guest when the channel is created, and the host is informed of that
-selection. VMbus devices are broadly grouped into two categories:
+selection. VMBus devices are broadly grouped into two categories:
-1. "Slow" devices that need only one VMbus channel. The devices
+1. "Slow" devices that need only one VMBus channel. The devices
(such as keyboard, mouse, heartbeat, and timesync) generate
- relatively few interrupts. Their VMbus channels are all
+ relatively few interrupts. Their VMBus channels are all
assigned to interrupt the VMBUS_CONNECT_CPU, which is always
CPU 0.
-2. "High speed" devices that may use multiple VMbus channels for
+2. "High speed" devices that may use multiple VMBus channels for
higher parallelism and performance. These devices include the
- synthetic SCSI controller and synthetic NIC. Their VMbus
+ synthetic SCSI controller and synthetic NIC. Their VMBus
channels interrupts are assigned to CPUs that are spread out
among the available CPUs in the VM so that interrupts on
multiple channels can be processed in parallel.
-The assignment of VMbus channel interrupts to CPUs is done in the
+The assignment of VMBus channel interrupts to CPUs is done in the
function init_vp_index(). This assignment is done outside of the
normal Linux interrupt affinity mechanism, so the interrupts are
neither "unmanaged" nor "managed" interrupts.
-The CPU that a VMbus channel will interrupt can be seen in
+The CPU that a VMBus channel will interrupt can be seen in
/sys/bus/vmbus/devices/<deviceGUID>/ channels/<channelRelID>/cpu.
When running on later versions of Hyper-V, the CPU can be changed
-by writing a new value to this sysfs entry. Because the interrupt
-assignment is done outside of the normal Linux affinity mechanism,
-there are no entries in /proc/irq corresponding to individual
-VMbus channel interrupts.
+by writing a new value to this sysfs entry. Because VMBus channel
+interrupts are not Linux IRQs, there are no entries in /proc/interrupts
+or /proc/irq corresponding to individual VMBus channel interrupts.
An online CPU in a Linux guest may not be taken offline if it has
-VMbus channel interrupts assigned to it. Any such channel
+VMBus channel interrupts assigned to it. Any such channel
interrupts must first be manually reassigned to another CPU as
described above. When no channel interrupts are assigned to the
CPU, it can be taken offline.
-When a guest CPU receives a VMbus interrupt from the host, the
-function vmbus_isr() handles the interrupt. It first checks for
-channel interrupts by calling vmbus_chan_sched(), which looks at a
-bitmap setup by the host to determine which channels have pending
-interrupts on this CPU. If multiple channels have pending
-interrupts for this CPU, they are processed sequentially. When all
-channel interrupts have been processed, vmbus_isr() checks for and
-processes any message received on the VMbus control path.
-
-The VMbus channel interrupt handling code is designed to work
+The VMBus channel interrupt handling code is designed to work
correctly even if an interrupt is received on a CPU other than the
CPU assigned to the channel. Specifically, the code does not use
CPU-based exclusion for correctness. In normal operation, Hyper-V
@@ -242,23 +265,23 @@ when Hyper-V will make the transition. The code must work correctly
even if there is a time lag before Hyper-V starts interrupting the
new CPU. See comments in target_cpu_store().
-VMbus device creation/deletion
+VMBus device creation/deletion
------------------------------
Hyper-V and the Linux guest have a separate message-passing path
that is used for synthetic device creation and deletion. This
-path does not use a VMbus channel. See vmbus_post_msg() and
+path does not use a VMBus channel. See vmbus_post_msg() and
vmbus_on_msg_dpc().
The first step is for the guest to connect to the generic
-Hyper-V VMbus mechanism. As part of establishing this connection,
-the guest and Hyper-V agree on a VMbus protocol version they will
+Hyper-V VMBus mechanism. As part of establishing this connection,
+the guest and Hyper-V agree on a VMBus protocol version they will
use. This negotiation allows newer Linux kernels to run on older
Hyper-V versions, and vice versa.
The guest then tells Hyper-V to "send offers". Hyper-V sends an
offer message to the guest for each synthetic device that the VM
-is configured to have. Each VMbus device type has a fixed GUID
-known as the "class ID", and each VMbus device instance is also
+is configured to have. Each VMBus device type has a fixed GUID
+known as the "class ID", and each VMBus device instance is also
identified by a GUID. The offer message from Hyper-V contains
both GUIDs to uniquely (within the VM) identify the device.
There is one offer message for each device instance, so a VM with
@@ -275,7 +298,7 @@ type based on the class ID, and invokes the correct driver to set up
the device. Driver/device matching is performed using the standard
Linux mechanism.
-The device driver probe function opens the primary VMbus channel to
+The device driver probe function opens the primary VMBus channel to
the corresponding VSP. It allocates guest memory for the channel
ring buffers and shares the ring buffer with the Hyper-V host by
giving the host a list of GPAs for the ring buffer memory. See
@@ -285,7 +308,7 @@ Once the ring buffer is set up, the device driver and VSP exchange
setup messages via the primary channel. These messages may include
negotiating the device protocol version to be used between the Linux
VSC and the VSP on the Hyper-V host. The setup messages may also
-include creating additional VMbus channels, which are somewhat
+include creating additional VMBus channels, which are somewhat
mis-named as "sub-channels" since they are functionally
equivalent to the primary channel once they are created.
diff --git a/MAINTAINERS b/MAINTAINERS
index cf9c9221c388..7507671fd4e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -846,12 +846,6 @@ ALPS PS/2 TOUCHPAD DRIVER
R: Pali Rohár <[email protected]>
F: drivers/input/mouse/alps.*
-ALTERA I2C CONTROLLER DRIVER
-M: Thor Thayer <[email protected]>
-S: Maintained
-F: Documentation/devicetree/bindings/i2c/i2c-altera.txt
-F: drivers/i2c/busses/i2c-altera.c
-
ALTERA MAILBOX DRIVER
M: Mun Yew Tham <[email protected]>
S: Maintained
@@ -871,21 +865,6 @@ L: [email protected]
S: Maintained
F: drivers/gpio/gpio-altera.c
-ALTERA SYSTEM MANAGER DRIVER
-M: Thor Thayer <[email protected]>
-S: Maintained
-F: drivers/mfd/altera-sysmgr.c
-F: include/linux/mfd/altera-sysmgr.h
-
-ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT
-M: Thor Thayer <[email protected]>
-S: Maintained
-F: drivers/gpio/gpio-altera-a10sr.c
-F: drivers/mfd/altera-a10sr.c
-F: drivers/reset/reset-a10sr.c
-F: include/dt-bindings/reset/altr,rst-mgr-a10sr.h
-F: include/linux/mfd/altera-a10sr.h
-
ALTERA TRIPLE SPEED ETHERNET DRIVER
M: Joyce Ooi <[email protected]>
@@ -1044,7 +1023,7 @@ M: Joerg Roedel <[email protected]>
R: Suravee Suthikulpanit <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/amd/
F: include/linux/amd-iommu.h
@@ -2892,7 +2871,7 @@ F: drivers/edac/altera_edac.[ch]
ARM/SPREADTRUM SoC SUPPORT
M: Orson Zhai <[email protected]>
M: Baolin Wang <[email protected]>
-M: Chunyan Zhang <[email protected]>
+R: Chunyan Zhang <[email protected]>
S: Maintained
F: arch/arm64/boot/dts/sprd
N: sprd
@@ -3601,10 +3580,9 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/b43
F: drivers/net/wireless/broadcom/b43/
B43LEGACY WIRELESS DRIVER
-M: Larry Finger <[email protected]>
-S: Maintained
+S: Orphan
W: https://wireless.wiki.kernel.org/en/users/Drivers/b43
F: drivers/net/wireless/broadcom/b43legacy/
@@ -3781,6 +3759,20 @@ F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c
+BLOCK LAYER DEVICE DRIVER API [RUST]
+M: Andreas Hindborg <[email protected]>
+R: Boqun Feng <[email protected]>
+S: Supported
+W: https://rust-for-linux.com
+B: https://github.com/Rust-for-Linux/linux/issues
+C: https://rust-for-linux.zulipchat.com/#narrow/stream/Block
+T: git https://github.com/Rust-for-Linux/linux.git rust-block-next
+F: drivers/block/rnull.rs
+F: rust/kernel/block.rs
+F: rust/kernel/block/
+
BLOCK2MTD DRIVER
M: Joern Engel <[email protected]>
@@ -3980,7 +3972,7 @@ R: Song Liu <[email protected]>
R: Yonghong Song <[email protected]>
R: John Fastabend <[email protected]>
R: KP Singh <[email protected]>
-R: Stanislav Fomichev <[email protected]>
+R: Stanislav Fomichev <[email protected]>
R: Hao Luo <[email protected]>
R: Jiri Olsa <[email protected]>
@@ -4083,12 +4075,13 @@ F: kernel/bpf/ringbuf.c
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <[email protected]>
-R: Matt Bobrowski <[email protected]>
+M: Matt Bobrowski <[email protected]>
S: Maintained
F: Documentation/bpf/prog_lsm.rst
F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c
+F: kernel/trace/bpf_trace.c
F: security/bpf/
BPF [SELFTESTS] (Test Runners & Infrastructure)
@@ -5295,7 +5288,7 @@ F: drivers/infiniband/hw/usnic/
CLANG CONTROL FLOW INTEGRITY SUPPORT
M: Sami Tolvanen <[email protected]>
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Nathan Chancellor <[email protected]>
S: Supported
@@ -6238,9 +6231,8 @@ S: Maintained
F: drivers/usb/dwc3/
DESIGNWARE XDATA IP DRIVER
-M: Gustavo Pimentel <[email protected]>
-S: Maintained
+S: Orphan
F: Documentation/misc-devices/dw-xdata-pcie.rst
F: drivers/misc/dw-xdata-pcie.c
@@ -8211,7 +8203,7 @@ F: rust/kernel/net/phy.rs
EXEC & BINFMT API, ELF
R: Eric Biederman <[email protected]>
-R: Kees Cook <[email protected]>
+R: Kees Cook <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
@@ -8482,6 +8474,7 @@ R: Darrick J. Wong <[email protected]>
S: Supported
+F: Documentation/filesystems/iomap/*
F: fs/iomap/
F: include/linux/iomap.h
@@ -8612,7 +8605,7 @@ S: Maintained
F: drivers/net/ethernet/nvidia/*
FORTIFY_SOURCE
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
@@ -8833,6 +8826,7 @@ F: drivers/spi/spi-fsl-qspi.c
FREESCALE QUICC ENGINE LIBRARY
M: Qiang Zhao <[email protected]>
+M: Christophe Leroy <[email protected]>
S: Maintained
F: drivers/soc/fsl/qe/
@@ -8882,9 +8876,10 @@ S: Maintained
F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS
+M: Christophe Leroy <[email protected]>
L: [email protected] (moderated for non-subscribers)
-S: Orphan
+S: Maintained
F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
@@ -9102,7 +9097,7 @@ F: include/linux/mfd/gsc.h
F: include/linux/platform_data/gsc_hwmon.h
GCC PLUGINS
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
@@ -9236,7 +9231,7 @@ S: Maintained
F: drivers/input/touchscreen/resistive-adc-touch.c
GENERIC STRING LIBRARY
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Andy Shevchenko <[email protected]>
S: Supported
@@ -11156,7 +11151,7 @@ M: David Woodhouse <[email protected]>
M: Lu Baolu <[email protected]>
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/intel/
INTEL IPU3 CSI-2 CIO2 DRIVER
@@ -11529,7 +11524,7 @@ IOMMU DMA-API LAYER
M: Robin Murphy <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/dma-iommu.c
F: drivers/iommu/dma-iommu.h
F: drivers/iommu/iova.c
@@ -11541,7 +11536,7 @@ M: Will Deacon <[email protected]>
R: Robin Murphy <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: Documentation/devicetree/bindings/iommu/
F: Documentation/userspace-api/iommu.rst
F: drivers/iommu/
@@ -11570,7 +11565,7 @@ F: include/linux/iosys-map.h
IO_URING
M: Jens Axboe <[email protected]>
-R: Pavel Begunkov <[email protected]>
+M: Pavel Begunkov <[email protected]>
S: Maintained
T: git git://git.kernel.dk/linux-block
@@ -11950,7 +11945,7 @@ F: scripts/package/
F: usr/
KERNEL HARDENING (not covered by other areas)
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Gustavo A. R. Silva <[email protected]>
S: Supported
@@ -12382,7 +12377,6 @@ F: drivers/video/backlight/ktz8866.c
KVM PARAVIRT (KVM/paravirt)
M: Paolo Bonzini <[email protected]>
-R: Wanpeng Li <[email protected]>
R: Vitaly Kuznetsov <[email protected]>
S: Supported
@@ -12478,7 +12472,7 @@ F: drivers/scsi/53c700*
LEAKING_ADDRESSES
M: Tycho Andersen <[email protected]>
-R: Kees Cook <[email protected]>
+R: Kees Cook <[email protected]>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
@@ -12774,7 +12768,7 @@ F: arch/powerpc/platforms/8xx/
F: arch/powerpc/platforms/83xx/
LINUX KERNEL DUMP TEST MODULE (LKDTM)
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
S: Maintained
F: drivers/misc/lkdtm/*
F: tools/testing/selftests/lkdtm/*
@@ -12904,7 +12898,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/
F: drivers/media/usb/dvb-usb-v2/lmedm04*
LOADPIN SECURITY MODULE
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
F: Documentation/admin-guide/LSM/LoadPin.rst
@@ -14474,7 +14468,7 @@ MEMORY MAPPING
M: Andrew Morton <[email protected]>
R: Liam R. Howlett <[email protected]>
R: Vlastimil Babka <[email protected]>
-R: Lorenzo Stoakes <[email protected]>
+R: Lorenzo Stoakes <[email protected]>
S: Maintained
W: http://www.linux-mm.org
@@ -16447,7 +16441,7 @@ F: arch/arm/boot/dts/ti/omap/am335x-nano.dts
OMAP1 SUPPORT
M: Aaro Koskinen <[email protected]>
M: Janusz Krzysztofik <[email protected]>
-M: Tony Lindgren <[email protected]>
+R: Tony Lindgren <[email protected]>
S: Maintained
Q: http://patchwork.kernel.org/project/linux-omap/list/
@@ -16459,10 +16453,13 @@ F: include/linux/platform_data/ams-delta-fiq.h
F: include/linux/platform_data/i2c-omap.h
OMAP2+ SUPPORT
+M: Aaro Koskinen <[email protected]>
+M: Andreas Kemnade <[email protected]>
+M: Kevin Hilman <[email protected]>
+M: Roger Quadros <[email protected]>
M: Tony Lindgren <[email protected]>
S: Maintained
-W: http://www.muru.com/linux/omap/
W: http://linux.omap.com/
Q: http://patchwork.kernel.org/project/linux-omap/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
@@ -17532,7 +17529,6 @@ F: include/linux/peci.h
PENSANDO ETHERNET DRIVERS
M: Shannon Nelson <[email protected]>
M: Brett Creeley <[email protected]>
S: Supported
F: Documentation/networking/device_drivers/ethernet/pensando/ionic.rst
@@ -17996,7 +17992,7 @@ F: tools/testing/selftests/proc/
PROC SYSCTL
M: Luis Chamberlain <[email protected]>
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
M: Joel Granados <[email protected]>
@@ -18052,7 +18048,7 @@ F: Documentation/devicetree/bindings/net/pse-pd/
F: drivers/net/pse-pd/
PSTORE FILESYSTEM
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Tony Luck <[email protected]>
R: Guilherme G. Piccoli <[email protected]>
@@ -18210,6 +18206,7 @@ QCOM AUDIO (ASoC) DRIVERS
M: Srinivas Kandagatla <[email protected]>
M: Banajit Goswami <[email protected]>
L: [email protected] (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/soc/qcom/qcom,apr*
F: Documentation/devicetree/bindings/sound/qcom,*
@@ -18374,7 +18371,7 @@ M: Jeff Johnson <[email protected]>
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git
F: drivers/net/wireless/ath/ath12k/
N: ath12k
@@ -18384,7 +18381,7 @@ M: Jeff Johnson <[email protected]>
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git
F: drivers/net/wireless/ath/ath10k/
N: ath10k
@@ -18395,7 +18392,7 @@ L: [email protected]
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git
F: drivers/net/wireless/ath/ath11k/
N: ath11k
@@ -18404,7 +18401,7 @@ M: Toke Høiland-Jørgensen <[email protected]>
S: Maintained
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git
F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
F: drivers/net/wireless/ath/ath9k/
@@ -19315,7 +19312,7 @@ F: drivers/perf/riscv_pmu_legacy.c
F: drivers/perf/riscv_pmu_sbi.c
RISC-V THEAD SoC SUPPORT
-M: Jisheng Zhang <[email protected]>
+M: Drew Fustini <[email protected]>
M: Guo Ren <[email protected]>
M: Fu Wei <[email protected]>
@@ -19509,7 +19506,6 @@ F: drivers/net/wireless/realtek/rtl818x/rtl8180/
RTL8187 WIRELESS DRIVER
M: Hin-Tak Leung <[email protected]>
-M: Larry Finger <[email protected]>
S: Maintained
T: git https://github.com/pkshih/rtw.git
@@ -20058,7 +20054,7 @@ F: drivers/media/cec/platform/seco/seco-cec.c
F: drivers/media/cec/platform/seco/seco-cec.h
SECURE COMPUTING
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Andy Lutomirski <[email protected]>
R: Will Drewry <[email protected]>
S: Supported
@@ -21247,7 +21243,6 @@ W: http://wiki.laptop.org/go/DCON
F: drivers/staging/olpc_dcon/
STAGING - REALTEK RTL8712U DRIVERS
-M: Larry Finger <[email protected]>
M: Florian Schilhabel <[email protected]>.
S: Odd Fixes
F: drivers/staging/rtl8712/
@@ -22746,7 +22741,7 @@ M: Jarkko Sakkinen <[email protected]>
R: Jason Gunthorpe <[email protected]>
S: Maintained
-W: https://gitlab.com/jarkkojs/linux-tpmdd-test
+W: https://codeberg.org/jarkko/linux-tpmdd-test
Q: https://patchwork.kernel.org/project/linux-integrity/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
F: Documentation/devicetree/bindings/tpm/
@@ -22972,7 +22967,7 @@ F: drivers/block/ublk_drv.c
F: include/uapi/linux/ublk_cmd.h
UBSAN
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
R: Marco Elver <[email protected]>
R: Andrey Konovalov <[email protected]>
R: Andrey Ryabinin <[email protected]>
@@ -23863,8 +23858,8 @@ S: Maintained
F: drivers/vhost/scsi.c
VIRTIO I2C DRIVER
-M: Conghui Chen <[email protected]>
M: Viresh Kumar <[email protected]>
+R: "Chen, Jian Jun" <[email protected]>
S: Maintained
@@ -23974,7 +23969,6 @@ VMALLOC
M: Andrew Morton <[email protected]>
R: Uladzislau Rezki <[email protected]>
R: Christoph Hellwig <[email protected]>
-R: Lorenzo Stoakes <[email protected]>
S: Maintained
W: http://www.linux-mm.org
@@ -24810,7 +24804,7 @@ F: drivers/net/hamradio/yam*
F: include/linux/yam.h
YAMA SECURITY MODULE
-M: Kees Cook <[email protected]>
+M: Kees Cook <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
F: Documentation/admin-guide/LSM/Yama.rst
diff --git a/Makefile b/Makefile
index 14427547dc1e..3d10e3aadeda 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 10
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION =
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
index d80440446473..05d7a462ea25 100644
--- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
@@ -85,7 +85,7 @@
};
};
- panel {
+ panel_dpi: panel {
compatible = "sii,43wvf1g";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_display_power>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
index c84e9b052527..151e9cee3c87 100644
--- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
+++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
@@ -10,8 +10,6 @@
/plugin/;
&{/} {
- /delete-node/ panel;
-
hdmi: connector-hdmi {
compatible = "hdmi-connector";
label = "hdmi";
@@ -82,6 +80,10 @@
};
};
+&panel_dpi {
+ status = "disabled";
+};
+
&tve {
status = "disabled";
};
diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
index 30139f21de64..15cbd94d7ec0 100644
--- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
@@ -128,6 +128,7 @@
pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
power-domains = <&power RK3066_PD_VIO>;
rockchip,grf = <&grf>;
+ #sound-dai-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 78282ced5038..e408399d5f0e 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -14,6 +14,7 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
+#include <asm/uaccess.h>
#ifdef CONFIG_EFI
void efi_init(void);
@@ -25,6 +26,18 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, boo
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
+#ifdef CONFIG_CPU_TTBR0_PAN
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) ({ \
+ unsigned int flags = uaccess_save_and_enable(); \
+ efi_status_t res = _Generic((p)->f(args), \
+ efi_status_t: (p)->f(args), \
+ default: ((p)->f(args), EFI_ABORTED)); \
+ uaccess_restore(flags); \
+ res; \
+})
+#endif
+
#define ARCH_EFI_IRQ_FLAGS_MASK \
(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
PSR_T_BIT | MODE_MASK)
diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c
index 8aa39db095d7..2c5155bd376b 100644
--- a/arch/arm/mach-davinci/pm.c
+++ b/arch/arm/mach-davinci/pm.c
@@ -61,7 +61,7 @@ static void davinci_pm_suspend(void)
/* Configure sleep count in deep sleep register */
val = __raw_readl(pm_config.deepsleep_reg);
- val &= ~DEEPSLEEP_SLEEPCOUNT_MASK,
+ val &= ~DEEPSLEEP_SLEEPCOUNT_MASK;
val |= pm_config.sleepcount;
__raw_writel(val, pm_config.deepsleep_reg);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..b6e8920364de 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1649,6 +1649,7 @@ config RODATA_FULL_DEFAULT_ENABLED
config ARM64_SW_TTBR0_PAN
bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+ depends on !KCSAN
help
Enabling this option prevents the kernel from accessing
user-space memory directly by pointing TTBR0_EL1 to a reserved
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
index c204dd43c726..ce90327e1b2e 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
@@ -191,7 +191,7 @@
compatible = "x-powers,axp803";
reg = <0x3a3>;
interrupt-parent = <&r_intc>;
- interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_LOW>;
x-powers,drive-vbus-en;
vin1-supply = <&reg_vcc5v>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index 4768b05fd765..98544741ce17 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -6,6 +6,7 @@
#include <dt-bindings/phy/phy-imx8-pcie.h>
#include <dt-bindings/pwm/pwm.h>
#include "imx8mm.dtsi"
+#include "imx8mm-overdrive.dtsi"
/ {
chosen {
@@ -935,7 +936,7 @@
/* Verdin GPIO_9_DSI (pulled-up as active-low) */
pinctrl_gpio_9_dsi: gpio9dsigrp {
fsl,pins =
- <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x146>; /* SODIMM 17 */
+ <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x1c6>; /* SODIMM 17 */
};
/* Verdin GPIO_10_DSI (pulled-up as active-low) */
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
index 43f1d45ccc96..f5115f9e8c47 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
@@ -254,7 +254,7 @@
<&clk IMX8MP_CLK_CLKOUT2>,
<&clk IMX8MP_AUDIO_PLL2_OUT>;
assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>;
- assigned-clock-rates = <13000000>, <13000000>, <156000000>;
+ assigned-clock-rates = <13000000>, <13000000>, <208000000>;
reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index dec57fad6828..e2b5e7ac3e46 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -219,7 +219,7 @@
bluetooth {
compatible = "brcm,bcm4330-bt";
- shutdown-gpios = <&gpio4 16 GPIO_ACTIVE_HIGH>;
+ shutdown-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
index 5c6b39c6933f..6e05361c1ffb 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
@@ -36,7 +36,7 @@
regulator-name = "SD1_SPWR";
regulator-min-microvolt = <3000000>;
regulator-max-microvolt = <3000000>;
- gpio = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>;
+ gpio = <&lsio_gpio4 7 GPIO_ACTIVE_HIGH>;
enable-active-high;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
index d400d85f42a9..bd98eff4d685 100644
--- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
@@ -296,7 +296,6 @@
vmmc-supply = <&reg_usdhc2_vmmc>;
bus-width = <4>;
status = "okay";
- no-sdio;
no-mmc;
};
diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
index f2a5e2e40461..f90f03fa6a24 100644
--- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi
+++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
@@ -1459,9 +1459,23 @@
system-cache-controller@19200000 {
compatible = "qcom,qdu1000-llcc";
- reg = <0 0x19200000 0 0xd80000>,
+ reg = <0 0x19200000 0 0x80000>,
+ <0 0x19300000 0 0x80000>,
+ <0 0x19600000 0 0x80000>,
+ <0 0x19700000 0 0x80000>,
+ <0 0x19a00000 0 0x80000>,
+ <0 0x19b00000 0 0x80000>,
+ <0 0x19e00000 0 0x80000>,
+ <0 0x19f00000 0 0x80000>,
<0 0x1a200000 0 0x80000>;
reg-names = "llcc0_base",
+ "llcc1_base",
+ "llcc2_base",
+ "llcc3_base",
+ "llcc4_base",
+ "llcc5_base",
+ "llcc6_base",
+ "llcc7_base",
"llcc_broadcast_base";
interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
index 31de73594839..1b3dc0ece54d 100644
--- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi
+++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
@@ -3605,7 +3605,7 @@
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
- <GIC_PPI 12 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>;
+ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>;
};
pcie0: pcie@1c00000 {
diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
index 067712310560..581a70c34fd2 100644
--- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
@@ -2647,11 +2647,14 @@
system-cache-controller@9200000 {
compatible = "qcom,sc8180x-llcc";
- reg = <0 0x09200000 0 0x50000>, <0 0x09280000 0 0x50000>,
- <0 0x09300000 0 0x50000>, <0 0x09380000 0 0x50000>,
- <0 0x09600000 0 0x50000>;
+ reg = <0 0x09200000 0 0x58000>, <0 0x09280000 0 0x58000>,
+ <0 0x09300000 0 0x58000>, <0 0x09380000 0 0x58000>,
+ <0 0x09400000 0 0x58000>, <0 0x09480000 0 0x58000>,
+ <0 0x09500000 0 0x58000>, <0 0x09580000 0 0x58000>,
+ <0 0x09600000 0 0x58000>;
reg-names = "llcc0_base", "llcc1_base", "llcc2_base",
- "llcc3_base", "llcc_broadcast_base";
+ "llcc3_base", "llcc4_base", "llcc5_base",
+ "llcc6_base", "llcc7_base", "llcc_broadcast_base";
interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
index 41215567b3ae..372b35fb844f 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
@@ -977,8 +977,7 @@
reset-n-pins {
pins = "gpio99";
function = "gpio";
- output-high;
- drive-strength = <16>;
+ bias-disable;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index e937732abede..4bf99b6b6e5f 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -655,15 +655,16 @@
status = "okay";
- /* FIXME: verify */
touchscreen@10 {
- compatible = "hid-over-i2c";
+ compatible = "elan,ekth5015m", "elan,ekth6915";
reg = <0x10>;
- hid-descr-addr = <0x1>;
interrupts-extended = <&tlmm 175 IRQ_TYPE_LEVEL_LOW>;
- vdd-supply = <&vreg_misc_3p3>;
- vddl-supply = <&vreg_s10b>;
+ reset-gpios = <&tlmm 99 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
+ no-reset-on-power-off;
+
+ vcc33-supply = <&vreg_misc_3p3>;
+ vccio-supply = <&vreg_misc_3p3>;
pinctrl-names = "default";
pinctrl-0 = <&ts0_default>;
@@ -1496,8 +1497,8 @@
reset-n-pins {
pins = "gpio99";
function = "gpio";
- output-high;
- drive-strength = <16>;
+ drive-strength = <2>;
+ bias-disable;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index 0549ba1fbeea..59f0a850671a 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -4623,6 +4623,8 @@
restart@c264000 {
compatible = "qcom,pshold";
reg = <0 0x0c264000 0 0x4>;
+ /* TZ seems to block access */
+ status = "reserved";
};
tsens1: thermal-sensor@c265000 {
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index aca0a87092e4..9ed062150aaf 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -1090,6 +1090,7 @@
power-domains = <&rpmpd SM6115_VDDCX>;
operating-points-v2 = <&sdhc1_opp_table>;
+ iommus = <&apps_smmu 0x00c0 0x0>;
interconnects = <&system_noc MASTER_SDCC_1 RPM_ALWAYS_TAG
&bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>,
<&bimc MASTER_AMPSS_M0 RPM_ALWAYS_TAG
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
index c5c2895b37c7..be6b1e7d07ce 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
@@ -49,6 +49,15 @@
stdout-path = "serial0:115200n8";
};
+ reserved-memory {
+ linux,cma {
+ compatible = "shared-dma-pool";
+ size = <0x0 0x8000000>;
+ reusable;
+ linux,cma-default;
+ };
+ };
+
sound {
compatible = "qcom,x1e80100-sndcard";
model = "X1E80100-CRD";
@@ -93,7 +102,7 @@
};
codec {
- sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>;
+ sound-dai = <&wcd938x 1>, <&swr2 1>, <&lpass_txmacro 0>;
};
platform {
@@ -744,7 +753,7 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- qcom,tx-port-mapping = <1 1 2 3>;
+ qcom,tx-port-mapping = <2 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
index 2061fbe7b75a..8f67c393b871 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
@@ -23,6 +23,15 @@
stdout-path = "serial0:115200n8";
};
+ reserved-memory {
+ linux,cma {
+ compatible = "shared-dma-pool";
+ size = <0x0 0x8000000>;
+ reusable;
+ linux,cma-default;
+ };
+ };
+
vph_pwr: vph-pwr-regulator {
compatible = "regulator-fixed";
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 5f90a0b3c016..05e4d491ec18 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -2737,15 +2737,17 @@
device_type = "pci";
compatible = "qcom,pcie-x1e80100";
reg = <0 0x01bf8000 0 0x3000>,
- <0 0x70000000 0 0xf1d>,
- <0 0x70000f20 0 0xa8>,
+ <0 0x70000000 0 0xf20>,
+ <0 0x70000f40 0 0xa8>,
<0 0x70001000 0 0x1000>,
- <0 0x70100000 0 0x100000>;
+ <0 0x70100000 0 0x100000>,
+ <0 0x01bfb000 0 0x1000>;
reg-names = "parf",
"dbi",
"elbi",
"atu",
- "config";
+ "config",
+ "mhi";
#address-cells = <3>;
#size-cells = <2>;
ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
index b47fe02c33fb..079101cddd65 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
@@ -5,6 +5,8 @@
*/
/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
#include "rk3308.dtsi"
/ {
@@ -24,17 +26,21 @@
leds {
compatible = "gpio-leds";
pinctrl-names = "default";
- pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>;
+ pinctrl-0 = <&green_led>, <&heartbeat_led>;
green-led {
+ color = <LED_COLOR_ID_GREEN>;
default-state = "on";
+ function = LED_FUNCTION_POWER;
gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
label = "rockpis:green:power";
linux,default-trigger = "default-on";
};
blue-led {
+ color = <LED_COLOR_ID_BLUE>;
default-state = "on";
+ function = LED_FUNCTION_HEARTBEAT;
gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
label = "rockpis:blue:user";
linux,default-trigger = "heartbeat";
@@ -126,10 +132,12 @@
};
&emmc {
- bus-width = <4>;
cap-mmc-highspeed;
- mmc-hs200-1_8v;
+ cap-sd-highspeed;
+ no-sdio;
non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>;
vmmc-supply = <&vcc_io>;
status = "okay";
};
@@ -214,11 +222,11 @@
pinctrl-0 = <&rtc_32k>;
leds {
- green_led_gio: green-led-gpio {
+ green_led: green-led {
rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
};
- heartbeat_led_gpio: heartbeat-led-gpio {
+ heartbeat_led: heartbeat-led {
rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index 962ea893999b..c00da150a22f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -811,7 +811,7 @@
clocks = <&cru SCLK_I2S2_8CH_TX_OUT>,
<&cru SCLK_I2S2_8CH_RX_OUT>,
<&cru PCLK_ACODEC>;
- reset-names = "codec-reset";
+ reset-names = "codec";
resets = <&cru SRST_ACODEC_P>;
#sound-dai-cells = <0>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index f09d60bbe6c4..a608a219543e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -241,8 +241,8 @@
rk805: pmic@18 {
compatible = "rockchip,rk805";
reg = <0x18>;
- interrupt-parent = <&gpio2>;
- interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
#clock-cells = <1>;
clock-output-names = "xin32k", "rk805-clkout2";
gpio-controller;
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index 734f87db4d11..73618df7a889 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -793,6 +793,7 @@
dma-names = "tx";
pinctrl-names = "default";
pinctrl-0 = <&spdif_tx>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -804,6 +805,7 @@
clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>;
dmas = <&dmac_bus 6>, <&dmac_bus 7>;
dma-names = "tx", "rx";
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -817,6 +819,7 @@
dma-names = "tx", "rx";
pinctrl-names = "default";
pinctrl-0 = <&i2s_8ch_bus>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index 789fd0dcc88b..3cd63d1e8f15 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 {
dlg,btn-cfg = <50>;
dlg,mic-det-thr = <500>;
dlg,jack-ins-deb = <20>;
- dlg,jack-det-rate = "32ms_64ms";
+ dlg,jack-det-rate = "32_64";
dlg,jack-rem-deb = <1>;
dlg,a-d-btn-thr = <0xa>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
index 26322a358d91..b908ce006c26 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
@@ -289,7 +289,7 @@
regulator-name = "vdd_gpu";
regulator-always-on;
regulator-boot-on;
- regulator-min-microvolt = <900000>;
+ regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
regulator-ramp-delay = <6001>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
index 1a604429fb26..e74871491ef5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
@@ -444,6 +444,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index b4f22d95ac0e..e80caa36f8e4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -435,6 +435,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index b8e15b76a8a6..2e7512676b7e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -383,6 +383,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
sd-uhs-sdr104;
vmmc-supply = <&vcc_3v3_s3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
index aebe1fedd2d8..615094bb8ba3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
@@ -344,6 +344,11 @@
};
};
+&pwm0 {
+ pinctrl-0 = <&pwm0m1_pins>;
+ pinctrl-names = "default";
+};
+
&saradc {
vref-supply = <&vcc_1v8_s0>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 3b2ec1d0c542..074c316a9a69 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -288,9 +288,9 @@
pinctrl-0 = <&i2c7m0_xfer>;
status = "okay";
- es8316: audio-codec@11 {
+ es8316: audio-codec@10 {
compatible = "everest,es8316";
- reg = <0x11>;
+ reg = <0x10>;
assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
assigned-clock-rates = <12288000>;
clocks = <&cru I2S0_8CH_MCLKOUT>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
index 8e2a07612d17..3b9a349362db 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
@@ -366,6 +366,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
@@ -393,6 +394,7 @@
pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 57a9abe78ee4..2c7bf4da0b80 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1036,6 +1036,7 @@ CONFIG_SND_AUDIO_GRAPH_CARD2=m
CONFIG_HID_MULTITOUCH=m
CONFIG_I2C_HID_ACPI=m
CONFIG_I2C_HID_OF=m
+CONFIG_I2C_HID_OF_ELAN=m
CONFIG_USB=y
CONFIG_USB_OTG=y
CONFIG_USB_XHCI_HCD=y
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index 980d1dd8e1a3..b8a5861dc7b7 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -112,6 +112,9 @@
#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+#define _ASM_EXTABLE_KACCESS(insn, fixup) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9943ff0af4c9..1f60aa1bc750 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -170,6 +170,7 @@
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55)))
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 14be5000c5a0..28f665e0975a 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_mem_asm(load, reg, x, addr, err, type) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_mem_asm(load, reg, x, addr, label, type) \
+ asm_goto_output( \
+ "1: " load " " reg "0, [%1]\n" \
+ _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \
+ : "=r" (x) \
+ : "r" (addr) : : label)
+#else
+#define __get_mem_asm(load, reg, x, addr, label, type) do { \
+ int __gma_err = 0; \
asm volatile( \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
_ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
- : "+r" (err), "=r" (x) \
- : "r" (addr))
+ : "+r" (__gma_err), "=r" (x) \
+ : "r" (addr)); \
+ if (__gma_err) goto label; } while (0)
+#endif
-#define __raw_get_mem(ldr, x, ptr, err, type) \
+#define __raw_get_mem(ldr, x, ptr, label, type) \
do { \
unsigned long __gu_val; \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \
break; \
case 2: \
- __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \
break; \
case 4: \
- __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \
break; \
case 8: \
- __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -219,27 +230,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_get_user(x, ptr, err) \
+#define __raw_get_user(x, ptr, label) \
do { \
__typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
__typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
- \
- uaccess_ttbr0_enable(); \
- __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
- \
- (x) = __rgu_val; \
+ do { \
+ __label__ __rgu_failed; \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ (x) = __rgu_val; \
+ break; \
+ __rgu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __get_user_error(x, ptr, err) \
do { \
+ __label__ __gu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_get_user((x), __p, (err)); \
+ __raw_get_user((x), __p, __gu_failed); \
} else { \
+ __gu_failed: \
(x) = (__force __typeof__(x))0; (err) = -EFAULT; \
} \
} while (0)
@@ -262,40 +280,42 @@ do { \
do { \
__typeof__(dst) __gkn_dst = (dst); \
__typeof__(src) __gkn_src = (src); \
- int __gkn_err = 0; \
- \
- __mte_enable_tco_async(); \
- __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
- (__force type *)(__gkn_src), __gkn_err, K); \
- __mte_disable_tco_async(); \
+ do { \
+ __label__ __gkn_label; \
\
- if (unlikely(__gkn_err)) \
+ __mte_enable_tco_async(); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_label, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __gkn_label: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while (0)
-#define __put_mem_asm(store, reg, x, addr, err, type) \
- asm volatile( \
- "1: " store " " reg "1, [%2]\n" \
+#define __put_mem_asm(store, reg, x, addr, label, type) \
+ asm goto( \
+ "1: " store " " reg "0, [%1]\n" \
"2:\n" \
- _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \
- : "+r" (err) \
- : "rZ" (x), "r" (addr))
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : : "rZ" (x), "r" (addr) : : label)
-#define __raw_put_mem(str, x, ptr, err, type) \
+#define __raw_put_mem(str, x, ptr, label, type) \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \
break; \
case 2: \
- __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \
break; \
case 4: \
- __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \
break; \
case 8: \
- __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -307,25 +327,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_put_user(x, ptr, err) \
+#define __raw_put_user(x, ptr, label) \
do { \
+ __label__ __rpu_failed; \
__typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
__typeof__(*(ptr)) __rpu_val = (x); \
__chk_user_ptr(__rpu_ptr); \
\
- uaccess_ttbr0_enable(); \
- __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
+ do { \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ break; \
+ __rpu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __put_user_error(x, ptr, err) \
do { \
+ __label__ __pu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_put_user((x), __p, (err)); \
+ __raw_put_user((x), __p, __pu_failed); \
} else { \
+ __pu_failed: \
(err) = -EFAULT; \
} \
} while (0)
@@ -348,15 +377,18 @@ do { \
do { \
__typeof__(dst) __pkn_dst = (dst); \
__typeof__(src) __pkn_src = (src); \
- int __pkn_err = 0; \
\
- __mte_enable_tco_async(); \
- __raw_put_mem("str", *((type *)(__pkn_src)), \
- (__force type *)(__pkn_dst), __pkn_err, K); \
- __mte_disable_tco_async(); \
- \
- if (unlikely(__pkn_err)) \
+ do { \
+ __label__ __pkn_err; \
+ __mte_enable_tco_async(); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __pkn_err: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while(0)
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -381,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
__actu_ret; \
})
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr,len)))
+ return 0;
+ uaccess_ttbr0_enable();
+ return 1;
+}
+#define user_access_begin(a,b) user_access_begin(a,b)
+#define user_access_end() uaccess_ttbr0_disable()
+#define unsafe_put_user(x, ptr, label) \
+ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
+#define unsafe_get_user(x, ptr, label) \
+ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
+
+/*
+ * KCSAN uses these to save and restore ttbr state.
+ * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so
+ * they are no-ops.
+ */
+static inline unsigned long user_access_save(void) { return 0; }
+static inline void user_access_restore(unsigned long enabled) { }
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 266b96acc014..1386e8e751f2 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 14251abee23c..824ca6987a51 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -27,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
}
#define prep_zero_mask(a, bits, c) (bits)
+#define create_zero_mask(bits) (bits)
+#define find_zero(bits) (__ffs(bits) >> 3)
-static inline unsigned long create_zero_mask(unsigned long bits)
+static inline unsigned long zero_bytemask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
return bits >> 7;
}
-static inline unsigned long find_zero(unsigned long mask)
-{
- return fls64(mask) >> 3;
-}
-
-#define zero_bytemask(mask) (mask)
-
#else /* __AARCH64EB__ */
#include <asm-generic/word-at-a-time.h>
#endif
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 4a92096db34e..712718aed5dd 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,6 +9,7 @@
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/kmemleak.h>
#include <linux/screen_info.h>
#include <linux/vmalloc.h>
@@ -213,6 +214,7 @@ l: if (!p) {
return -ENOMEM;
}
+ kmemleak_not_leak(p);
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dcdcccd40891..6174671be7c1 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -582,12 +582,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl);
size_t mte_probe_user_range(const char __user *uaddr, size_t size)
{
const char __user *end = uaddr + size;
- int err = 0;
char val;
- __raw_get_user(val, uaddr, err);
- if (err)
- return size;
+ __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
while (uaddr < end) {
@@ -595,12 +592,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size)
* A read is sufficient for mte, the caller should have probed
* for the pte write permission if required.
*/
- __raw_get_user(val, uaddr, err);
- if (err)
- return end - uaddr;
+ __raw_get_user(val, uaddr, efault);
uaddr += MTE_GRANULE_SIZE;
}
(void)val;
return 0;
+
+efault:
+ return end - uaddr;
}
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index 5fa08e13e17e..f374a3e5a5fe 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void)
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
- (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
+ (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index ad198262b981..7230f6e20ab8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_set_return_value(current, regs, 0, ret);
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for arm64 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 9 bits.
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte aligned SP at function boundaries, which will remove the
+ * 4 low bits from any entropy chosen here.
*
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints: the AAPCS mandates a
- * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
- *
- * The resulting 5 bits of entropy is seen in SP[8:4].
+ * The resulting 6 bits of entropy is seen in SP[9:4].
*/
- choose_random_kstack_offset(get_random_u16() & 0x1FF);
+ choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 755a22d4f840..55a8e310ea12 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -264,6 +264,9 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 02746f9d0980..efb053af331c 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
+static void ffa_rx_release(struct arm_smccc_res *res)
+{
+ arm_smccc_1_1_smc(FFA_RX_RELEASE,
+ 0, 0,
+ 0, 0, 0, 0, 0,
+ res);
+}
+
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
+ ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
+ ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
@@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 8f5b7a3e7009..7f68cf58b978 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -391,7 +391,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
INIT_LIST_HEAD(&dist->rd_regions);
} else {
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a3983a631b5a..9e50928f5d7d 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -919,8 +919,19 @@ free:
return ret;
}
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg)
{
+ struct kvm_vcpu *vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->arch.config_lock);
+
+ /* Garbage collect the region */
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (vcpu->arch.vgic_cpu.rdreg == rdreg)
+ vcpu->arch.vgic_cpu.rdreg = NULL;
+ }
+
list_del(&rdreg->list);
kfree(rdreg);
}
@@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 6106ebd5ba42..03d356a12377 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -316,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
u32 index);
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg);
bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c927e9312f10..353ea5dc32b8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new)
* The following mapping attributes may be updated in live
* kernel mappings without the need for break-before-make.
*/
- pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+ pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG |
+ PTE_SWBITS_MASK;
/* creating or taking down mappings is always safe */
if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index 7ff6a2466af1..e0594b6370a6 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -6,6 +6,7 @@
#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
#define __NR_set_thread_area (__NR_arch_specific_syscall + 0)
diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c
index 3d30e58a45d2..4540a271ee39 100644
--- a/arch/csky/kernel/syscall.c
+++ b/arch/csky/kernel/syscall.c
@@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2,
unsigned long, prot,
unsigned long, flags,
unsigned long, fd,
- off_t, offset)
+ unsigned long, offset)
{
if (unlikely(offset & (~PAGE_MASK >> 12)))
return -EINVAL;
diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h
new file mode 100644
index 000000000000..40f2d08bec92
--- /dev/null
+++ b/arch/hexagon/include/asm/syscalls.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/syscalls.h>
+
+asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice,
+ u32 a2, u32 a3, u32 a4, u32 a5);
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 432c4db1b623..21ae22306b5d 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -36,5 +36,6 @@
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c
index 0fadd582cfc7..5d98bdc494ec 100644
--- a/arch/hexagon/kernel/syscalltab.c
+++ b/arch/hexagon/kernel/syscalltab.c
@@ -14,6 +14,13 @@
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
+SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice,
+ SC_ARG64(offset), SC_ARG64(len))
+{
+ return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice);
+}
+#define sys_fadvise64_64 sys_hexagon_fadvise64_64
+
void *sys_call_table[__NR_syscalls] = {
#include <asm/unistd.h>
};
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e38139c576ee..ddc042895d01 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -143,7 +143,7 @@ config LOONGARCH
select HAVE_LIVEPATCH
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
- select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
+ select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -261,6 +261,9 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
+config AS_HAS_THIN_ADD_SUB
+ def_bool $(cc-option,-Wa$(comma)-mthin-add-sub)
+
config AS_HAS_LSX_EXTENSION
def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
index 98d60630c3d4..8b2ce5b5d43e 100644
--- a/arch/loongarch/Kconfig.debug
+++ b/arch/loongarch/Kconfig.debug
@@ -28,6 +28,7 @@ config UNWINDER_PROLOGUE
config UNWINDER_ORC
bool "ORC unwinder"
+ depends on HAVE_OBJTOOL
select OBJTOOL
help
This option enables the ORC (Oops Rewind Capability) unwinder for
diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h
index 21447fb1efc7..d78330916bd1 100644
--- a/arch/loongarch/include/asm/hw_breakpoint.h
+++ b/arch/loongarch/include/asm/hw_breakpoint.h
@@ -75,6 +75,8 @@ do { \
#define CSR_MWPC_NUM 0x3f
#define CTRL_PLV_ENABLE 0x1e
+#define CTRL_PLV0_ENABLE 0x02
+#define CTRL_PLV3_ENABLE 0x10
#define MWPnCFG3_LoadEn 8
#define MWPnCFG3_StoreEn 9
@@ -101,7 +103,7 @@ struct perf_event;
struct perf_event_attr;
extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type, int *offset);
+ int *gen_len, int *gen_type);
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index fc55c4de2a11..621ad7634df7 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -174,11 +174,21 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
static int hw_breakpoint_control(struct perf_event *bp,
enum hw_breakpoint_ops ops)
{
- u32 ctrl;
+ u32 ctrl, privilege;
int i, max_slots, enable;
+ struct pt_regs *regs;
struct perf_event **slots;
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ if (arch_check_bp_in_kernelspace(info))
+ privilege = CTRL_PLV0_ENABLE;
+ else
+ privilege = CTRL_PLV3_ENABLE;
+
+ /* Whether bp belongs to a task. */
+ if (bp->hw.target)
+ regs = task_pt_regs(bp->hw.target);
+
if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
/* Breakpoint */
slots = this_cpu_ptr(bp_on_reg);
@@ -197,31 +207,38 @@ static int hw_breakpoint_control(struct perf_event *bp,
switch (ops) {
case HW_BREAKPOINT_INSTALL:
/* Set the FWPnCFG/MWPnCFG 1~4 register. */
- write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
- write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
- write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
- write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
- write_wb_reg(CSR_CFG_ASID, i, 0, 0);
- write_wb_reg(CSR_CFG_ASID, i, 1, 0);
if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
- write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+ write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
+ write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
+ write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 0, privilege);
} else {
+ write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
+ write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
+ write_wb_reg(CSR_CFG_ASID, i, 1, 0);
ctrl = encode_ctrl_reg(info->ctrl);
- write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
+ write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege);
}
enable = csr_read64(LOONGARCH_CSR_CRMD);
csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
+ if (bp->hw.target)
+ regs->csr_prmd |= CSR_PRMD_PWE;
break;
case HW_BREAKPOINT_UNINSTALL:
/* Reset the FWPnCFG/MWPnCFG 1~4 register. */
- write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
- write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
- write_wb_reg(CSR_CFG_MASK, i, 0, 0);
- write_wb_reg(CSR_CFG_MASK, i, 1, 0);
- write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
- write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
- write_wb_reg(CSR_CFG_ASID, i, 0, 0);
- write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+ if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+ write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
+ write_wb_reg(CSR_CFG_MASK, i, 0, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+ write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+ } else {
+ write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
+ write_wb_reg(CSR_CFG_MASK, i, 1, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
+ write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+ }
+ if (bp->hw.target)
+ regs->csr_prmd &= ~CSR_PRMD_PWE;
break;
}
@@ -283,7 +300,7 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type, int *offset)
+ int *gen_len, int *gen_type)
{
/* Type */
switch (ctrl.type) {
@@ -303,11 +320,6 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
return -EINVAL;
}
- if (!ctrl.len)
- return -EINVAL;
-
- *offset = __ffs(ctrl.len);
-
/* Len */
switch (ctrl.len) {
case LOONGARCH_BREAKPOINT_LEN_1:
@@ -386,21 +398,17 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
struct arch_hw_breakpoint *hw)
{
int ret;
- u64 alignment_mask, offset;
+ u64 alignment_mask;
/* Build the arch_hw_breakpoint. */
ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
- if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE)
- alignment_mask = 0x7;
- else
+ if (hw->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
alignment_mask = 0x3;
- offset = hw->address & alignment_mask;
-
- hw->address &= ~alignment_mask;
- hw->ctrl.len <<= offset;
+ hw->address &= ~alignment_mask;
+ }
return 0;
}
@@ -471,12 +479,15 @@ void breakpoint_handler(struct pt_regs *regs)
slots = this_cpu_ptr(bp_on_reg);
for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) {
- bp = slots[i];
- if (bp == NULL)
- continue;
- perf_bp_event(bp, regs);
+ if ((csr_read32(LOONGARCH_CSR_FWPS) & (0x1 << i))) {
+ bp = slots[i];
+ if (bp == NULL)
+ continue;
+ perf_bp_event(bp, regs);
+ csr_write32(0x1 << i, LOONGARCH_CSR_FWPS);
+ update_bp_registers(regs, 0, 0);
+ }
}
- update_bp_registers(regs, 0, 0);
}
NOKPROBE_SYMBOL(breakpoint_handler);
@@ -488,12 +499,15 @@ void watchpoint_handler(struct pt_regs *regs)
slots = this_cpu_ptr(wp_on_reg);
for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) {
- wp = slots[i];
- if (wp == NULL)
- continue;
- perf_bp_event(wp, regs);
+ if ((csr_read32(LOONGARCH_CSR_MWPS) & (0x1 << i))) {
+ wp = slots[i];
+ if (wp == NULL)
+ continue;
+ perf_bp_event(wp, regs);
+ csr_write32(0x1 << i, LOONGARCH_CSR_MWPS);
+ update_bp_registers(regs, 0, 1);
+ }
}
- update_bp_registers(regs, 0, 1);
}
NOKPROBE_SYMBOL(watchpoint_handler);
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index c114c5ef1332..200109de1971 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -494,28 +494,14 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
struct arch_hw_breakpoint_ctrl ctrl,
struct perf_event_attr *attr)
{
- int err, len, type, offset;
+ int err, len, type;
- err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
+ err = arch_bp_generic_fields(ctrl, &len, &type);
if (err)
return err;
- switch (note_type) {
- case NT_LOONGARCH_HW_BREAK:
- if ((type & HW_BREAKPOINT_X) != type)
- return -EINVAL;
- break;
- case NT_LOONGARCH_HW_WATCH:
- if ((type & HW_BREAKPOINT_RW) != type)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
attr->bp_len = len;
attr->bp_type = type;
- attr->bp_addr += offset;
return 0;
}
@@ -609,10 +595,27 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type,
return PTR_ERR(bp);
attr = bp->attr;
- decode_ctrl_reg(uctrl, &ctrl);
- err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
- if (err)
- return err;
+
+ switch (note_type) {
+ case NT_LOONGARCH_HW_BREAK:
+ ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE;
+ ctrl.len = LOONGARCH_BREAKPOINT_LEN_4;
+ break;
+ case NT_LOONGARCH_HW_WATCH:
+ decode_ctrl_reg(uctrl, &ctrl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (uctrl & CTRL_PLV_ENABLE) {
+ err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
+ if (err)
+ return err;
+ attr.disabled = 0;
+ } else {
+ attr.disabled = 1;
+ }
return modify_user_hw_breakpoint(bp, &attr);
}
@@ -643,6 +646,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type,
struct perf_event *bp;
struct perf_event_attr attr;
+ /* Kernel-space address cannot be monitored by user-space */
+ if ((unsigned long)addr >= XKPRANGE)
+ return -EINVAL;
+
bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
if (IS_ERR(bp))
return PTR_ERR(bp);
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index b4c5acd7aa3b..8801611143ab 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -22,7 +22,7 @@
#define __SYSCALL(nr, call) [nr] = (call),
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long,
- prot, unsigned long, flags, unsigned long, fd, off_t, offset)
+ prot, unsigned long, flags, unsigned long, fd, unsigned long, offset)
{
if (offset & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index c86e099af5ca..a68573e091c0 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -761,7 +761,7 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
default:
ret = KVM_HCALL_INVALID_CODE;
break;
- };
+ }
kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret);
}
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 642fb80c5c4e..874fe9588773 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -71,7 +71,7 @@ static void nfhd_submit_bio(struct bio *bio)
len = bvec.bv_len;
len >>= 9;
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
- page_to_phys(bvec.bv_page) + bvec.bv_offset);
+ bvec_phys(&bvec));
sec += len;
}
bio_endio(bio);
@@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
{
struct queue_limits lim = {
.logical_block_size = bsize,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct nfhd_device *dev;
int dev_id = id - NFHD_DEV_OFFSET;
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index ed9f34da1a2a..0850b099f300 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -35,7 +35,7 @@
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags, unsigned long, fd,
- off_t, pgoff)
+ unsigned long, pgoff)
{
if (pgoff & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index ec180ab92eaa..66a8ba19c287 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -110,7 +110,8 @@ static void bcm6358_quirks(void)
* RAC flush causes kernel panics on BCM6358 when booting from TP1
* because the bootloader is not initializing it properly.
*/
- bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
+ bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)) ||
+ !!BMIPS_GET_CBR();
}
static void bcm6368_quirks(void)
diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h
index 30e86861c206..b1ee3c48e84b 100644
--- a/arch/mips/include/asm/mipsmtregs.h
+++ b/arch/mips/include/asm/mipsmtregs.h
@@ -322,7 +322,7 @@ static inline void ehb(void)
" .set push \n" \
" .set "MIPS_ISA_LEVEL" \n" \
_ASM_SET_MFTC0 \
- " mftc0 $1, " #rt ", " #sel " \n" \
+ " mftc0 %0, " #rt ", " #sel " \n" \
_ASM_UNSET_MFTC0 \
" .set pop \n" \
: "=r" (__res)); \
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cc869f5d5693..953f5b7dc723 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -354,7 +354,7 @@
412 n32 utimensat_time64 sys_utimensat
413 n32 pselect6_time64 compat_sys_pselect6_time64
414 n32 ppoll_time64 compat_sys_ppoll_time64
-416 n32 io_pgetevents_time64 sys_io_pgetevents
+416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64
417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64
418 n32 mq_timedsend_time64 sys_mq_timedsend
419 n32 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 008ebe60263e..2439a2491cff 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -27,7 +27,7 @@
17 o32 break sys_ni_syscall
# 18 was sys_stat
18 o32 unused18 sys_ni_syscall
-19 o32 lseek sys_lseek
+19 o32 lseek sys_lseek compat_sys_lseek
20 o32 getpid sys_getpid
21 o32 mount sys_mount
22 o32 umount sys_oldumount
@@ -403,7 +403,7 @@
412 o32 utimensat_time64 sys_utimensat sys_utimensat
413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c
index 874ed6df9768..34b9323bdabb 100644
--- a/arch/mips/pci/ops-rc32434.c
+++ b/arch/mips/pci/ops-rc32434.c
@@ -112,8 +112,8 @@ retry:
* gives them time to settle
*/
if (where == PCI_VENDOR_ID) {
- if (ret == 0xffffffff || ret == 0x00000000 ||
- ret == 0x0000ffff || ret == 0xffff0000) {
+ if (*val == 0xffffffff || *val == 0x00000000 ||
+ *val == 0x0000ffff || *val == 0xffff0000) {
if (delay > 4)
return 0;
delay *= 2;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index daafeb20f993..dc9b902de8ea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
select ARCH_HAS_UBSAN
select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
+ select ARCH_SPLIT_ARG64 if !64BIT
select ARCH_SUPPORTS_HUGETLBFS if PA20
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_STACKWALK
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 2a12a547b447..826c8e51b585 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23,
current->comm, current->pid, r20);
return -ENOSYS;
}
-
-asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags,
- compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd,
- const char __user * pathname)
-{
- return sys_fanotify_mark(fanotify_fd, flags,
- ((__u64)mask1 << 32) | mask0,
- dfd, pathname);
-}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b13c21373974..66dc406b12e4 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -108,7 +108,7 @@
95 common fchown sys_fchown
96 common getpriority sys_getpriority
97 common setpriority sys_setpriority
-98 common recv sys_recv
+98 common recv sys_recv compat_sys_recv
99 common statfs sys_statfs compat_sys_statfs
100 common fstatfs sys_fstatfs compat_sys_fstatfs
101 common stat64 sys_stat64
@@ -135,7 +135,7 @@
120 common clone sys_clone_wrapper
121 common setdomainname sys_setdomainname
122 common sendfile sys_sendfile compat_sys_sendfile
-123 common recvfrom sys_recvfrom
+123 common recvfrom sys_recvfrom compat_sys_recvfrom
124 32 adjtimex sys_adjtimex_time32
124 64 adjtimex sys_adjtimex
125 common mprotect sys_mprotect
@@ -364,7 +364,7 @@
320 common accept4 sys_accept4
321 common prlimit64 sys_prlimit64
322 common fanotify_init sys_fanotify_init
-323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark
+323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
324 32 clock_adjtime sys_clock_adjtime32
324 64 clock_adjtime sys_clock_adjtime
325 common name_to_handle_at sys_name_to_handle_at
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
index e1094f08f713..e9fe73aac8b6 100644
--- a/arch/powerpc/crypto/.gitignore
+++ b/arch/powerpc/crypto/.gitignore
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
aesp10-ppc.S
+aesp8-ppc.S
ghashp10-ppc.S
+ghashp8-ppc.S
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d1030bc52564..d283d281d28e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
struct eeh_dev *edev;
struct pci_dev *pdev;
+ struct pci_bus *bus = NULL;
if (pe->type & EEH_PE_PHB)
return pe->phb->bus;
@@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
/* Retrieve the parent PCI bus of first (top) PCI device */
edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pci_lock_rescan_remove();
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
- return pdev->bus;
+ bus = pdev->bus;
+ pci_unlock_rescan_remove();
- return NULL;
+ return bus;
}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4690c219bfa4..63432a33ec49 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -647,8 +647,9 @@ __after_prom_start:
* Note: This process overwrites the OF exception vectors.
*/
LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
- mr. r4,r26 /* In some cases the loader may */
- beq 9f /* have already put us at zero */
+ mr r4,r26 /* Load the virtual source address into r4 */
+ cmpld r3,r4 /* Check if source == dest */
+ beq 9f /* If so skip the copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 3656f1ca7a21..ebae8415dfbb 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -230,8 +230,10 @@
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -246,6 +248,7 @@
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
191 64 readahead sys_readahead
+191 spu readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
@@ -293,6 +296,7 @@
232 nospu set_tid_address sys_set_tid_address
233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
@@ -502,7 +506,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 85050be08a23..72b12bc10f90 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -27,6 +27,7 @@
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
@@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index b569ebaa590e..3ff3de9a52ac 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -130,14 +130,16 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
}
rcu_read_unlock();
- fdput(f);
-
- if (!found)
+ if (!found) {
+ fdput(f);
return -EINVAL;
+ }
table_group = iommu_group_get_iommudata(grp);
- if (WARN_ON(!table_group))
+ if (WARN_ON(!table_group)) {
+ fdput(f);
return -EFAULT;
+ }
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
struct iommu_table *tbltmp = table_group->tables[i];
@@ -158,8 +160,10 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
break;
}
}
- if (!tbl)
+ if (!tbl) {
+ fdput(f);
return -EINVAL;
+ }
rcu_read_lock();
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -170,6 +174,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
/* stit is being destroyed */
iommu_tce_table_put(tbl);
rcu_read_unlock();
+ fdput(f);
return -ENOTTY;
}
/*
@@ -177,6 +182,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
* its KVM reference counter and can return.
*/
rcu_read_unlock();
+ fdput(f);
return 0;
}
rcu_read_unlock();
@@ -184,6 +190,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
stit = kzalloc(sizeof(*stit), GFP_KERNEL);
if (!stit) {
iommu_tce_table_put(tbl);
+ fdput(f);
return -ENOMEM;
}
@@ -192,6 +199,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
list_add_rcu(&stit->next, &stt->iommu_tables);
+ fdput(f);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 096d09ed89f6..431be156ca9b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
} else
xics_kexec_teardown_cpu(secondary);
}
-
-void pseries_machine_kexec(struct kimage *image)
-{
- if (firmware_has_feature(FW_FEATURE_SET_MODE))
- pseries_disable_reloc_on_exc();
-
- default_machine_kexec(image);
-}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index bba4ad192b0f..3968a6970fa8 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { }
#endif
extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
-void pseries_machine_kexec(struct kimage *image);
extern void pSeries_final_fixup(void);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 284a6fa04b0c..b10a25325238 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void)
{
void (*ctor)(void *) = get_dtl_cache_ctor();
- dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, ctor);
+ dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -1159,7 +1159,6 @@ define_machine(pseries) {
.machine_check_exception = pSeries_machine_check_exception,
.machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
- .machine_kexec = pseries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
index 8df4cf3656f2..a7d753b6fdfd 100644
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -15,6 +15,10 @@
model = "Kendryte KD233";
compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -46,7 +50,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -118,6 +121,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&spi0 {
@@ -125,6 +129,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "canaan,kd233-tft", "ilitek,ili9341";
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index f87c5164d9cf..4f5d40fa1e77 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -16,13 +16,6 @@
#size-cells = <1>;
compatible = "canaan,kendryte-k210";
- aliases {
- serial0 = &uarths0;
- serial1 = &uart1;
- serial2 = &uart2;
- serial3 = &uart3;
- };
-
/*
* The K210 has an sv39 MMU following the privileged specification v1.9.
* Since this is a non-ratified draft specification, the kernel does not
@@ -137,6 +130,7 @@
reg = <0x38000000 0x1000>;
interrupts = <33>;
clocks = <&sysclk K210_CLK_CPU>;
+ status = "disabled";
};
gpio0: gpio-controller@38001000 {
@@ -152,6 +146,7 @@
<62>, <63>, <64>, <65>;
gpio-controller;
ngpios = <32>;
+ status = "disabled";
};
dmac0: dma-controller@50000000 {
@@ -187,6 +182,7 @@
<&sysclk K210_CLK_GPIO>;
clock-names = "bus", "db";
resets = <&sysrst K210_RST_GPIO>;
+ status = "disabled";
gpio1_0: gpio-port@0 {
#gpio-cells = <2>;
@@ -214,6 +210,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart2: serial@50220000 {
@@ -230,6 +227,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart3: serial@50230000 {
@@ -246,6 +244,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
spi2: spi@50240000 {
@@ -259,6 +258,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ssi_clk", "pclk";
resets = <&sysrst K210_RST_SPI2>;
+ status = "disabled";
};
i2s0: i2s@50250000 {
@@ -268,6 +268,7 @@
clocks = <&sysclk K210_CLK_I2S0>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S0>;
+ status = "disabled";
};
i2s1: i2s@50260000 {
@@ -277,6 +278,7 @@
clocks = <&sysclk K210_CLK_I2S1>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S1>;
+ status = "disabled";
};
i2s2: i2s@50270000 {
@@ -286,6 +288,7 @@
clocks = <&sysclk K210_CLK_I2S2>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S2>;
+ status = "disabled";
};
i2c0: i2c@50280000 {
@@ -296,6 +299,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C0>;
+ status = "disabled";
};
i2c1: i2c@50290000 {
@@ -306,6 +310,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C1>;
+ status = "disabled";
};
i2c2: i2c@502a0000 {
@@ -316,6 +321,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C2>;
+ status = "disabled";
};
fpioa: pinmux@502b0000 {
@@ -464,6 +470,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi1: spi@53000000 {
@@ -479,6 +486,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi3: spi@54000000 {
@@ -495,6 +503,7 @@
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
};
};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
index 396c8ca4d24d..5734cc03753b 100644
--- a/arch/riscv/boot/dts/canaan/k210_generic.dts
+++ b/arch/riscv/boot/dts/canaan/k210_generic.dts
@@ -15,6 +15,10 @@
model = "Kendryte K210 generic";
compatible = "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -24,7 +28,6 @@
&fpioa {
pinctrl-0 = <&jtag_pins>;
pinctrl-names = "default";
- status = "okay";
jtag_pins: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index 6d25bf07481a..2ab376d609d2 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -58,7 +62,6 @@
&fpioa {
pinctrl-names = "default";
pinctrl-0 = <&jtag_pinctrl>;
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -156,6 +159,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -170,6 +174,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -199,6 +204,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index f4f4d8d5e8b8..d98e20775c07 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -63,7 +67,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -159,6 +162,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -173,6 +177,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 0d86df47e1ed..79ecd549700a 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -16,6 +16,10 @@
model = "SiPeed MAIX GO";
compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -69,7 +73,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -167,6 +170,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -181,6 +185,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -209,6 +214,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index 5c05c498e2b8..019c03ae51f6 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -15,6 +15,10 @@
model = "SiPeed MAIXDUINO";
compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -39,8 +43,6 @@
};
&fpioa {
- status = "okay";
-
uarths_pinctrl: uarths-pinmux {
pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
<K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
@@ -132,6 +134,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -146,6 +149,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -174,6 +178,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
index cd013588adc0..375ff2661b6e 100644
--- a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
+++ b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
@@ -45,6 +45,7 @@
no-1-8-v;
no-mmc;
no-sdio;
+ disable-wp;
};
&uart0 {
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index 8ff6ea64f048..68d16717db8c 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -244,7 +244,7 @@
regulator-boot-on;
regulator-always-on;
regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
regulator-name = "emmc_vdd";
};
};
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 06e439eeef9a..09fde95a5e8f 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -145,7 +145,7 @@
/* parts of opcode for RVF, RVD and RVQ */
#define RVFDQ_FL_FS_WIDTH_OFF 12
-#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0)
+#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0)
#define RVFDQ_FL_FS_WIDTH_W 2
#define RVFDQ_FL_FS_WIDTH_D 3
#define RVFDQ_LS_FS_WIDTH_Q 4
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 87cbd86576b2..4b95c574fd04 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
mutex_unlock(&text_mutex);
- if (!mod)
- local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE);
-
return out;
}
@@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data)
} else {
while (atomic_read(&param->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return 0;
}
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ed9cad20c039..3c830a6f7ef4 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void)
for_each_irq_desc(i, desc) {
struct irq_chip *chip;
- int ret;
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
chip->irq_eoi(&desc->irq_data);
if (chip->irq_mask)
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 4007563fb607..ab03732d06c4 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
memset(waddr, c, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
ret = copy_to_kernel_nofault(waddr, insn, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
ret = patch_insn_set(tp, c, len);
- if (!ret)
- flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
@@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
ret = patch_insn_write(tp, insns, len);
- if (!ret)
- flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_nosync);
@@ -253,9 +263,9 @@ static int patch_text_cb(void *data)
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return ret;
}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 528ec7cc9a62..10e311b2759d 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int graph_idx = 0;
int level = 0;
if (regs) {
@@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
pc = regs->ra;
} else {
fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
if (pc == (unsigned long)ret_from_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
@@ -156,7 +157,7 @@ unsigned long __get_wchan(struct task_struct *task)
return pc;
}
-noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 64155323cc92..d77afe05578f 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
@@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 04db1f993c47..bcf41d6e0df0 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
- pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
+ pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 48ef5fe5c08a..5a36d5538dae 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset)
u64 *entry;
/*
- * Even without -fPIE, Clang still uses a global offset table for some
- * reason. Adjust the GOT entries.
+ * Adjust GOT entries, except for ones for undefined weak symbols
+ * that resolved to zero. This also skips the first three reserved
+ * entries on s390x that are zero.
*/
- for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset - __START_KERNEL;
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+ if (*entry)
+ *entry += offset - __START_KERNEL;
+ }
}
/*
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 8c4adece8911..f3602414a961 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6dd11d3b6aaa..d0d8925fdf09 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 7f5004065e8a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+ choose_random_kstack_offset(get_tod_clock_fast());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95990461888f..9281063636a7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -427,6 +427,7 @@ struct kvm_vcpu_stat {
u64 instruction_io_other;
u64 instruction_lpsw;
u64 instruction_lpswe;
+ u64 instruction_lpswey;
u64 instruction_pfmf;
u64 instruction_ptff;
u64 instruction_sck;
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..50cbcbbaa03d 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -38,33 +38,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82e9631cd9ef..54b5b2565df8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_io_other),
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+ STATS_DESC_COUNTER(VCPU, instruction_lpswey),
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
STATS_DESC_COUNTER(VCPU, instruction_ptff),
STATS_DESC_COUNTER(VCPU, instruction_sck),
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 111eb5c74784..bf8534218af3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+ s64 disp1;
+
+ /* The displacement is a 20bit _SIGNED_ value */
+ disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+ if (ar)
+ *ar = base1;
+
+ return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
u64 *address1, u64 *address2,
u8 *ar_b1, u8 *ar_b2)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1be19cc9d73c..1a49b89706f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswey++;
+
+ if (!test_kvm_facility(vcpu->kvm, 193))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ return 0;
+}
+
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
case 0x61:
case 0x62:
return handle_ri(vcpu);
+ case 0x71:
+ return handle_lpswey(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index abb629d7e131..7e3e767ab87d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val)
static void base_crst_free(unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index ff8f24854c64..0ef83b6ac0db 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused)
union zpci_sic_iib iib = {{0}};
union zpci_sic_iib ziib = {{0}};
- iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+ iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 9dca568509a5..d6f4afcb0e87 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
(u64)len0 << 32 | len1, advice);
#endif
}
+
+/*
+ * swap the arguments the way that libc wants them instead of
+ * moving flags ahead of the 64-bit nbytes argument
+ */
+SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset),
+ SC_ARG64(nbytes), unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset),
+ SC_VAL64(loff_t, nbytes), flags);
+}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bbf83a2db986..c55fd7696d40 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -321,7 +321,7 @@
311 common set_robust_list sys_set_robust_list
312 common get_robust_list sys_get_robust_list
313 common splice sys_splice
-314 common sync_file_range sys_sync_file_range
+314 common sync_file_range sys_sh_sync_file_range6
315 common tee sys_tee
316 common vmsplice sys_vmsplice
317 common move_pages sys_move_pages
@@ -395,6 +395,7 @@
385 common pkey_alloc sys_pkey_alloc
386 common pkey_free sys_pkey_free
387 common rseq sys_rseq
+388 common sync_file_range2 sys_sync_file_range2
# room for arch specific syscalls
393 common semget sys_semget
394 common semctl sys_semctl
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index a45f0f31fe51..a3d308f2043e 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -18,224 +18,3 @@ sys32_mmap2:
sethi %hi(sys_mmap), %g1
jmpl %g1 + %lo(sys_mmap), %g0
sllx %o5, 12, %o5
-
- .align 32
- .globl sys32_socketcall
-sys32_socketcall: /* %o0=call, %o1=args */
- cmp %o0, 1
- bl,pn %xcc, do_einval
- cmp %o0, 18
- bg,pn %xcc, do_einval
- sub %o0, 1, %o0
- sllx %o0, 5, %o0
- sethi %hi(__socketcall_table_begin), %g2
- or %g2, %lo(__socketcall_table_begin), %g2
- jmpl %g2 + %o0, %g0
- nop
-do_einval:
- retl
- mov -EINVAL, %o0
-
- .align 32
-__socketcall_table_begin:
-
- /* Each entry is exactly 32 bytes. */
-do_sys_socket: /* sys_socket(int, int, int) */
-1: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socket), %g1
-2: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_socket), %g0
-3: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
-4: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_bind), %g1
-5: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_bind), %g0
-6: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
-7: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_connect), %g1
-8: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_connect), %g0
-9: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_listen: /* sys_listen(int, int) */
-10: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_listen), %g1
- jmpl %g1 + %lo(sys_listen), %g0
-11: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
-12: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept), %g1
-13: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_accept), %g0
-14: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
-15: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockname), %g1
-16: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getsockname), %g0
-17: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
-18: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getpeername), %g1
-19: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getpeername), %g0
-20: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
-21: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socketpair), %g1
-22: ldswa [%o1 + 0x8] %asi, %o2
-23: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_socketpair), %g0
-24: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
-25: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_send), %g1
-26: lduwa [%o1 + 0x8] %asi, %o2
-27: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_send), %g0
-28: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
-29: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recv), %g1
-30: lduwa [%o1 + 0x8] %asi, %o2
-31: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_recv), %g0
-32: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
-33: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_sendto), %g1
-34: lduwa [%o1 + 0x8] %asi, %o2
-35: lduwa [%o1 + 0xc] %asi, %o3
-36: lduwa [%o1 + 0x10] %asi, %o4
-37: ldswa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_sendto), %g0
-38: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
-39: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recvfrom), %g1
-40: lduwa [%o1 + 0x8] %asi, %o2
-41: lduwa [%o1 + 0xc] %asi, %o3
-42: lduwa [%o1 + 0x10] %asi, %o4
-43: lduwa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_recvfrom), %g0
-44: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_shutdown: /* sys_shutdown(int, int) */
-45: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_shutdown), %g1
- jmpl %g1 + %lo(sys_shutdown), %g0
-46: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */
-47: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_setsockopt), %g1
-48: ldswa [%o1 + 0x8] %asi, %o2
-49: lduwa [%o1 + 0xc] %asi, %o3
-50: ldswa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_setsockopt), %g0
-51: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */
-52: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockopt), %g1
-53: ldswa [%o1 + 0x8] %asi, %o2
-54: lduwa [%o1 + 0xc] %asi, %o3
-55: lduwa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_getsockopt), %g0
-56: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
-57: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_sendmsg), %g1
-58: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_sendmsg), %g0
-59: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
-60: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_recvmsg), %g1
-61: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_recvmsg), %g0
-62: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
-63: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept4), %g1
-64: lduwa [%o1 + 0x8] %asi, %o2
-65: ldswa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_accept4), %g0
-66: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-
- .section __ex_table,"a"
- .align 4
- .word 1b, __retl_efault, 2b, __retl_efault
- .word 3b, __retl_efault, 4b, __retl_efault
- .word 5b, __retl_efault, 6b, __retl_efault
- .word 7b, __retl_efault, 8b, __retl_efault
- .word 9b, __retl_efault, 10b, __retl_efault
- .word 11b, __retl_efault, 12b, __retl_efault
- .word 13b, __retl_efault, 14b, __retl_efault
- .word 15b, __retl_efault, 16b, __retl_efault
- .word 17b, __retl_efault, 18b, __retl_efault
- .word 19b, __retl_efault, 20b, __retl_efault
- .word 21b, __retl_efault, 22b, __retl_efault
- .word 23b, __retl_efault, 24b, __retl_efault
- .word 25b, __retl_efault, 26b, __retl_efault
- .word 27b, __retl_efault, 28b, __retl_efault
- .word 29b, __retl_efault, 30b, __retl_efault
- .word 31b, __retl_efault, 32b, __retl_efault
- .word 33b, __retl_efault, 34b, __retl_efault
- .word 35b, __retl_efault, 36b, __retl_efault
- .word 37b, __retl_efault, 38b, __retl_efault
- .word 39b, __retl_efault, 40b, __retl_efault
- .word 41b, __retl_efault, 42b, __retl_efault
- .word 43b, __retl_efault, 44b, __retl_efault
- .word 45b, __retl_efault, 46b, __retl_efault
- .word 47b, __retl_efault, 48b, __retl_efault
- .word 49b, __retl_efault, 50b, __retl_efault
- .word 51b, __retl_efault, 52b, __retl_efault
- .word 53b, __retl_efault, 54b, __retl_efault
- .word 55b, __retl_efault, 56b, __retl_efault
- .word 57b, __retl_efault, 58b, __retl_efault
- .word 59b, __retl_efault, 60b, __retl_efault
- .word 61b, __retl_efault, 62b, __retl_efault
- .word 63b, __retl_efault, 64b, __retl_efault
- .word 65b, __retl_efault, 66b, __retl_efault
- .previous
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index ac6c281ccfe0..cfdfb3707c16 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -117,7 +117,7 @@
90 common dup2 sys_dup2
91 32 setfsuid32 sys_setfsuid
92 common fcntl sys_fcntl compat_sys_fcntl
-93 common select sys_select
+93 common select sys_select compat_sys_select
94 32 setfsgid32 sys_setfsgid
95 common fsync sys_fsync
96 common setpriority sys_setpriority
@@ -155,7 +155,7 @@
123 32 fchown sys_fchown16
123 64 fchown sys_fchown
124 common fchmod sys_fchmod
-125 common recvfrom sys_recvfrom
+125 common recvfrom sys_recvfrom compat_sys_recvfrom
126 32 setreuid sys_setreuid16
126 64 setreuid sys_setreuid
127 32 setregid sys_setregid16
@@ -247,7 +247,7 @@
204 32 readdir sys_old_readdir compat_sys_old_readdir
204 64 readdir sys_nis_syscall
205 common readahead sys_readahead compat_sys_readahead
-206 common socketcall sys_socketcall sys32_socketcall
+206 common socketcall sys_socketcall compat_sys_socketcall
207 common syslog sys_syslog
208 common lookup_dcookie sys_ni_syscall
209 common fadvise64 sys_fadvise64 compat_sys_fadvise64
@@ -461,7 +461,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ef805eaa9e01..9f1e76ddda5a 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -447,43 +447,31 @@ static int bulk_req_safe_read(
return n;
}
-/* Called without dev->lock held, and only in interrupt context. */
-static void ubd_handler(void)
+static void ubd_end_request(struct io_thread_req *io_req)
{
- int n;
- int count;
-
- while(1){
- n = bulk_req_safe_read(
- thread_fd,
- irq_req_buffer,
- &irq_remainder,
- &irq_remainder_size,
- UBD_REQ_BUFFER_SIZE
- );
- if (n < 0) {
- if(n == -EAGAIN)
- break;
- printk(KERN_ERR "spurious interrupt in ubd_handler, "
- "err = %d\n", -n);
- return;
- }
- for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
- struct io_thread_req *io_req = (*irq_req_buffer)[count];
-
- if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
- blk_queue_max_discard_sectors(io_req->req->q, 0);
- blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
- }
- blk_mq_end_request(io_req->req, io_req->error);
- kfree(io_req);
- }
+ if (io_req->error == BLK_STS_NOTSUPP) {
+ if (req_op(io_req->req) == REQ_OP_DISCARD)
+ blk_queue_disable_discard(io_req->req->q);
+ else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
+ blk_queue_disable_write_zeroes(io_req->req->q);
}
+ blk_mq_end_request(io_req->req, io_req->error);
+ kfree(io_req);
}
static irqreturn_t ubd_intr(int irq, void *dev)
{
- ubd_handler();
+ int len, i;
+
+ while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
+ &irq_remainder, &irq_remainder_size,
+ UBD_REQ_BUFFER_SIZE)) >= 0) {
+ for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
+ ubd_end_request((*irq_req_buffer)[i]);
+ }
+
+ if (len < 0 && len != -EAGAIN)
+ pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
return IRQ_HANDLED;
}
@@ -847,6 +835,7 @@ static int ubd_add(int n, char **error_out)
struct queue_limits lim = {
.max_segments = MAX_SG,
.seg_boundary_mask = PAGE_SIZE - 1,
+ .features = BLK_FEAT_WRITE_CACHE,
};
struct gendisk *disk;
int err = 0;
@@ -893,8 +882,6 @@ static int ubd_add(int n, char **error_out)
goto out_cleanup_tags;
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_write_cache(disk->queue, true, false);
disk->major = UBD_MAJOR;
disk->first_minor = n << UBD_SHIFT;
disk->minors = 1 << UBD_SHIFT;
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 11c9b8efdc4c..ed0a5f2dc129 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -89,10 +89,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
cld
- IBRS_ENTER
- UNTRAIN_RET
- CLEAR_BRANCH_HISTORY
-
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -116,6 +112,16 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
+ /*
+ * CPU bugs mitigations mechanisms can call other functions. They
+ * should be invoked after making sure TF is cleared because
+ * single-step is ignored only for instructions inside the
+ * entry_SYSENTER_compat function.
+ */
+ IBRS_ENTER
+ UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
+
movq %rsp, %rdi
call do_SYSENTER_32
jmp sysret32_from_system_call
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7fd1f57ad3d3..d6ebcab1d8b2 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -420,7 +420,7 @@
412 i386 utimensat_time64 sys_utimensat
413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 i386 io_pgetevents_time64 sys_io_pgetevents
+416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 i386 mq_timedsend_time64 sys_mq_timedsend
419 i386 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index ed2797f132ce..62cef2113ca7 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
- : [ptr] "+m" (*(_ptr)), \
- "+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
o.full; \
@@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
if (unlikely(!ret)) \
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 1dc600fa3ba5..481096177500 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -401,7 +401,6 @@ extern int __init efi_memmap_alloc(unsigned int num_entries,
struct efi_memory_map_data *data);
extern void __efi_memmap_free(u64 phys, unsigned long size,
unsigned long flags);
-#define __efi_memmap_free __efi_memmap_free
extern int __init efi_memmap_install(struct efi_memory_map_data *data);
extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..fb2809b20b0a 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#endif
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for x86 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 8 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints (see cc_stack_align4/8 in
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
* low bits from any entropy chosen here.
*
- * Therefore, final stack offset entropy will be 5 (x86_64) or
- * 6 (ia32) bits.
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
*/
- choose_random_kstack_offset(rdtsc() & 0xFF);
+ choose_random_kstack_offset(rdtsc());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..24e3a53ca255
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n\t" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index e8d7d4941c4c..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -5,45 +5,12 @@
#include <linux/bitops.h>
#include <linux/wordpart.h>
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
struct word_at_a_time {
const unsigned long one_bits, high_bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
- return mask*0x0001020304050608ul >> 56;
-}
-
-#else /* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001+mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
-}
-
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
return bits;
}
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
static inline unsigned long create_zero_mask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
@@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline unsigned long find_zero(unsigned long mask)
{
- return count_masked_bytes(mask);
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
}
+#endif
+
/*
* Load an unaligned word from kernel space.
*
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 2345e6836593..366f496ca3ce 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -519,7 +519,8 @@ void free_rmid(u32 closid, u32 rmid)
* allows architectures that ignore the closid parameter to avoid an
* unnecessary check.
*/
- if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ if (!resctrl_arch_mon_capable() ||
+ idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
RESCTRL_RESERVED_RMID))
return;
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e42faa792c07..52e1f3f0b361 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -27,25 +27,7 @@
unsigned long profile_pc(struct pt_regs *regs)
{
- unsigned long pc = instruction_pointer(regs);
-
- if (!user_mode(regs) && in_lock_functions(pc)) {
-#ifdef CONFIG_FRAME_POINTER
- return *(unsigned long *)(regs->bp + sizeof(long));
-#else
- unsigned long *sp = (unsigned long *)regs->sp;
- /*
- * Return address is either directly at stack pointer
- * or above a saved flags. Eflags has bits 22-31 zero,
- * kernel addresses don't.
- */
- if (sp[0] >> 22)
- return sp[0];
- if (sp[1] >> 22)
- return sp[1];
-#endif
- }
- return pc;
+ return instruction_pointer(regs);
}
EXPORT_SYMBOL(profile_pc);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..6e73403e874f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -357,6 +357,9 @@ SECTIONS
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 296c524988f9..c95d3900fe56 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2843,7 +2843,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (sev_es_prevent_msr_access(vcpu, msr_info)) {
msr_info->data = 0;
- return -EINVAL;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
}
switch (msr_info->index) {
@@ -2998,7 +2998,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
if (sev_es_prevent_msr_access(vcpu, msr))
- return -EINVAL;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8c9e4281d978..0763a0f72a06 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10718,13 +10718,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
- else {
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
- if (ioapic_in_kernel(vcpu->kvm))
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
- }
+ else if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
if (is_guest_mode(vcpu))
vcpu->arch.load_eoi_exitmap_pending = true;
diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c
index 4ef20b49eb5e..6ed1935504b9 100644
--- a/arch/x86/platform/efi/memmap.c
+++ b/arch/x86/platform/efi/memmap.c
@@ -92,12 +92,22 @@ int __init efi_memmap_alloc(unsigned int num_entries,
*/
int __init efi_memmap_install(struct efi_memory_map_data *data)
{
+ unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map;
+ unsigned long flags = efi.memmap.flags;
+ u64 phys = efi.memmap.phys_map;
+ int ret;
+
efi_memmap_unmap();
if (efi_enabled(EFI_PARAVIRT))
return 0;
- return __efi_memmap_init(data);
+ ret = __efi_memmap_init(data);
+ if (ret)
+ return ret;
+
+ __efi_memmap_free(phys, size, flags);
+ return 0;
}
/**
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 08010dbf5e09..df275d554788 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -19,7 +19,7 @@
struct task_struct;
-static inline struct task_struct *get_current(void)
+static __always_inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 326db1c1d5d8..e0dffcc43b9e 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -91,7 +91,7 @@ struct thread_info {
}
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
+static __always_inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
__asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index defc67909a9c..d6d2b533a574 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = {
static int __init simdisk_setup(struct simdisk *dev, int which,
struct proc_dir_entry *procdir)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
char tmp[2] = { '0' + which, 0 };
int err;
@@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
spin_lock_init(&dev->lock);
dev->users = 0;
- dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
+ dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE);
if (IS_ERR(dev->gd)) {
err = PTR_ERR(dev->gd);
goto out;
diff --git a/block/Kconfig b/block/Kconfig
index dc12af58dbae..5b623b876d3b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -62,6 +62,8 @@ config BLK_DEV_BSGLIB
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
+ select CRC_T10DIF
+ select CRC64_ROCKSOFT
help
Some storage devices allow extra information to be
stored/retrieved to help protect the data. The block layer
@@ -72,12 +74,6 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
-config BLK_DEV_INTEGRITY_T10
- tristate
- depends on BLK_DEV_INTEGRITY
- select CRC_T10DIF
- select CRC64_ROCKSOFT
-
config BLK_DEV_WRITE_MOUNTED
bool "Allow writing to mounted block devices"
default y
diff --git a/block/Makefile b/block/Makefile
index 168150b9c510..ddfd21c1a9ff 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -26,8 +26,7 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
diff --git a/block/bdev.c b/block/bdev.c
index 353677ac49b3..c5507b6f63b8 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -385,7 +385,7 @@ static struct file_system_type bd_type = {
};
struct super_block *blockdev_superblock __ro_after_init;
-struct vfsmount *blockdev_mnt __ro_after_init;
+static struct vfsmount *blockdev_mnt __ro_after_init;
EXPORT_SYMBOL_GPL(blockdev_superblock);
void __init bdev_cache_init(void)
@@ -1260,23 +1260,42 @@ void sync_bdevs(bool wait)
}
/*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode. Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
*/
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct path *path, struct kstat *stat,
+ u32 request_mask)
{
+ struct inode *backing_inode;
struct block_device *bdev;
- bdev = blkdev_get_no_open(inode->i_rdev);
+ if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
+ return;
+
+ backing_inode = d_backing_inode(path->dentry);
+
+ /*
+ * Note that backing_inode is the inode of a block device node file,
+ * not the block device's internal inode. Therefore it is *not* valid
+ * to use I_BDEV() here; the block device has to be looked up by i_rdev
+ * instead.
+ */
+ bdev = blkdev_get_no_open(backing_inode->i_rdev);
if (!bdev)
return;
- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- stat->result_mask |= STATX_DIOALIGN;
+ if (request_mask & STATX_DIOALIGN) {
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ stat->result_mask |= STATX_DIOALIGN;
+ }
+
+ if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
+ struct request_queue *bd_queue = bdev->bd_queue;
+
+ generic_fill_statx_atomic_writes(stat,
+ queue_atomic_write_unit_min_bytes(bd_queue),
+ queue_atomic_write_unit_max_bytes(bd_queue));
+ }
blkdev_put_no_open(bdev);
}
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d442ee358fc2..b758693697c0 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -797,57 +797,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
*/
bfq_link_bfqg(bfqd, bfqg);
__bfq_bic_change_cgroup(bfqd, bic, bfqg);
- /*
- * Update blkg_path for bfq_log_* functions. We cache this
- * path, and update it here, for the following
- * reasons. Operations on blkg objects in blk-cgroup are
- * protected with the request_queue lock, and not with the
- * lock that protects the instances of this scheduler
- * (bfqd->lock). This exposes BFQ to the following sort of
- * race.
- *
- * The blkg_lookup performed in bfq_get_queue, protected
- * through rcu, may happen to return the address of a copy of
- * the original blkg. If this is the case, then the
- * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
- * the blkg, is useless: it does not prevent blk-cgroup code
- * from destroying both the original blkg and all objects
- * directly or indirectly referred by the copy of the
- * blkg.
- *
- * On the bright side, destroy operations on a blkg invoke, as
- * a first step, hooks of the scheduler associated with the
- * blkg. And these hooks are executed with bfqd->lock held for
- * BFQ. As a consequence, for any blkg associated with the
- * request queue this instance of the scheduler is attached
- * to, we are guaranteed that such a blkg is not destroyed, and
- * that all the pointers it contains are consistent, while we
- * are holding bfqd->lock. A blkg_lookup performed with
- * bfqd->lock held then returns a fully consistent blkg, which
- * remains consistent until this lock is held.
- *
- * Thanks to the last fact, and to the fact that: (1) bfqg has
- * been obtained through a blkg_lookup in the above
- * assignment, and (2) bfqd->lock is being held, here we can
- * safely use the policy data for the involved blkg (i.e., the
- * field bfqg->pd) to get to the blkg associated with bfqg,
- * and then we can safely use any field of blkg. After we
- * release bfqd->lock, even just getting blkg through this
- * bfqg may cause dangling references to be traversed, as
- * bfqg->pd may not exist any more.
- *
- * In view of the above facts, here we cache, in the bfqg, any
- * blkg data we may need for this bic, and for its associated
- * bfq_queue. As of now, we need to cache only the path of the
- * blkg, which is used in the bfq_log_* functions.
- *
- * Finally, note that bfqg itself needs to be protected from
- * destruction on the blkg_free of the original blkg (which
- * invokes bfq_pd_free). We use an additional private
- * refcounter for bfqg, to let it disappear only after no
- * bfq_queue refers to it any longer.
- */
- blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
bic->blkcg_serial_nr = serial_nr;
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4b88a54a9b76..36a4998c4b37 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5463,40 +5463,42 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
}
}
+static void _bfq_exit_icq(struct bfq_io_cq *bic, unsigned int num_actuators)
+{
+ struct bfq_iocq_bfqq_data *bfqq_data = bic->bfqq_data;
+ unsigned int act_idx;
+
+ for (act_idx = 0; act_idx < num_actuators; act_idx++) {
+ if (bfqq_data[act_idx].stable_merge_bfqq)
+ bfq_put_stable_ref(bfqq_data[act_idx].stable_merge_bfqq);
+
+ bfq_exit_icq_bfqq(bic, true, act_idx);
+ bfq_exit_icq_bfqq(bic, false, act_idx);
+ }
+}
+
static void bfq_exit_icq(struct io_cq *icq)
{
struct bfq_io_cq *bic = icq_to_bic(icq);
struct bfq_data *bfqd = bic_to_bfqd(bic);
unsigned long flags;
- unsigned int act_idx;
+
/*
* If bfqd and thus bfqd->num_actuators is not available any
* longer, then cycle over all possible per-actuator bfqqs in
* next loop. We rely on bic being zeroed on creation, and
* therefore on its unused per-actuator fields being NULL.
- */
- unsigned int num_actuators = BFQ_MAX_ACTUATORS;
- struct bfq_iocq_bfqq_data *bfqq_data = bic->bfqq_data;
-
- /*
+ *
* bfqd is NULL if scheduler already exited, and in that case
* this is the last time these queues are accessed.
*/
if (bfqd) {
spin_lock_irqsave(&bfqd->lock, flags);
- num_actuators = bfqd->num_actuators;
- }
-
- for (act_idx = 0; act_idx < num_actuators; act_idx++) {
- if (bfqq_data[act_idx].stable_merge_bfqq)
- bfq_put_stable_ref(bfqq_data[act_idx].stable_merge_bfqq);
-
- bfq_exit_icq_bfqq(bic, true, act_idx);
- bfq_exit_icq_bfqq(bic, false, act_idx);
- }
-
- if (bfqd)
+ _bfq_exit_icq(bic, bfqd->num_actuators);
spin_unlock_irqrestore(&bfqd->lock, flags);
+ } else {
+ _bfq_exit_icq(bic, BFQ_MAX_ACTUATORS);
+ }
}
/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 467e8cfc41a2..08ddf2cfae5b 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -1003,9 +1003,6 @@ struct bfq_group {
/* must be the first member */
struct blkg_policy_data pd;
- /* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
- char blkg_path[128];
-
/* reference counter (see comments in bfq_bic_update_cgroup) */
refcount_t ref;
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8b528e12136f..b78c145eb026 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -76,7 +76,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
&bip->bip_max_vcnt, gfp_mask);
if (!bip->bip_vec)
goto err;
- } else {
+ } else if (nr_vecs) {
bip->bip_vec = bip->bip_inline_vecs;
}
@@ -276,6 +276,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
bip->bip_iter.bi_sector = seed;
+ bip->bip_vcnt = nr_vecs;
return 0;
free_bip:
bio_integrity_free(bio);
@@ -297,6 +298,7 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
bip->bip_flags |= BIP_INTEGRITY_USER;
bip->bip_iter.bi_sector = seed;
bip->bip_iter.bi_size = len;
+ bip->bip_vcnt = nr_vecs;
return 0;
}
@@ -334,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
u32 seed)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
+ unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
unsigned int direction, nr_bvecs;
@@ -397,44 +399,6 @@ free_bvec:
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
/**
- * bio_integrity_process - Process integrity metadata for a bio
- * @bio: bio to generate/verify integrity metadata for
- * @proc_iter: iterator to process
- * @proc_fn: Pointer to the relevant processing function
- */
-static blk_status_t bio_integrity_process(struct bio *bio,
- struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
-{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
- struct blk_integrity_iter iter;
- struct bvec_iter bviter;
- struct bio_vec bv;
- struct bio_integrity_payload *bip = bio_integrity(bio);
- blk_status_t ret = BLK_STS_OK;
-
- iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
- iter.interval = 1 << bi->interval_exp;
- iter.tuple_size = bi->tuple_size;
- iter.seed = proc_iter->bi_sector;
- iter.prot_buf = bvec_virt(bip->bip_vec);
- iter.pi_offset = bi->pi_offset;
-
- __bio_for_each_segment(bv, bio, bviter, *proc_iter) {
- void *kaddr = bvec_kmap_local(&bv);
-
- iter.data_buf = kaddr;
- iter.data_size = bv.bv_len;
- ret = proc_fn(&iter);
- kunmap_local(kaddr);
-
- if (ret)
- break;
-
- }
- return ret;
-}
-
-/**
* bio_integrity_prep - Prepare bio for integrity I/O
* @bio: bio to prepare
*
@@ -450,17 +414,13 @@ bool bio_integrity_prep(struct bio *bio)
{
struct bio_integrity_payload *bip;
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned int len;
void *buf;
- unsigned long start, end;
- unsigned int len, nr_pages;
- unsigned int bytes, offset, i;
+ gfp_t gfp = GFP_NOIO;
if (!bi)
return true;
- if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
- return true;
-
if (!bio_sectors(bio))
return true;
@@ -468,32 +428,36 @@ bool bio_integrity_prep(struct bio *bio)
if (bio_integrity(bio))
return true;
- if (bio_data_dir(bio) == READ) {
- if (!bi->profile->verify_fn ||
- !(bi->flags & BLK_INTEGRITY_VERIFY))
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ if (bi->flags & BLK_INTEGRITY_NOVERIFY)
return true;
- } else {
- if (!bi->profile->generate_fn ||
- !(bi->flags & BLK_INTEGRITY_GENERATE))
+ break;
+ case REQ_OP_WRITE:
+ if (bi->flags & BLK_INTEGRITY_NOGENERATE)
return true;
+
+ /*
+ * Zero the memory allocated to not leak uninitialized kernel
+ * memory to disk for non-integrity metadata where nothing else
+ * initializes the memory.
+ */
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+ gfp |= __GFP_ZERO;
+ break;
+ default:
+ return true;
}
/* Allocate kernel buffer for protection data */
len = bio_integrity_bytes(bi, bio_sectors(bio));
- buf = kmalloc(len, GFP_NOIO);
+ buf = kmalloc(len, gfp);
if (unlikely(buf == NULL)) {
- printk(KERN_ERR "could not allocate integrity buffer\n");
goto err_end_io;
}
- end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- start = ((unsigned long) buf) >> PAGE_SHIFT;
- nr_pages = end - start;
-
- /* Allocate bio integrity payload and integrity vectors */
- bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+ bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
if (IS_ERR(bip)) {
- printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
goto err_end_io;
}
@@ -501,35 +465,20 @@ bool bio_integrity_prep(struct bio *bio)
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
bip_set_seed(bip, bio->bi_iter.bi_sector);
- if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
bip->bip_flags |= BIP_IP_CHECKSUM;
- /* Map it */
- offset = offset_in_page(buf);
- for (i = 0; i < nr_pages && len > 0; i++) {
- bytes = PAGE_SIZE - offset;
-
- if (bytes > len)
- bytes = len;
-
- if (bio_integrity_add_page(bio, virt_to_page(buf),
- bytes, offset) < bytes) {
- printk(KERN_ERR "could not attach integrity payload\n");
- goto err_end_io;
- }
-
- buf += bytes;
- len -= bytes;
- offset = 0;
+ if (bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf)) < len) {
+ printk(KERN_ERR "could not attach integrity payload\n");
+ goto err_end_io;
}
/* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE) {
- bio_integrity_process(bio, &bio->bi_iter,
- bi->profile->generate_fn);
- } else {
+ if (bio_data_dir(bio) == WRITE)
+ blk_integrity_generate(bio);
+ else
bip->bio_iter = bio->bi_iter;
- }
return true;
err_end_io:
@@ -552,15 +501,8 @@ static void bio_integrity_verify_fn(struct work_struct *work)
struct bio_integrity_payload *bip =
container_of(work, struct bio_integrity_payload, bip_work);
struct bio *bio = bip->bip_bio;
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
- /*
- * At the moment verify is called bio's iterator was advanced
- * during split and completion, we need to rewind iterator to
- * it's original position.
- */
- bio->bi_status = bio_integrity_process(bio, &bip->bio_iter,
- bi->profile->verify_fn);
+ blk_integrity_verify(bio);
bio_integrity_free(bio);
bio_endio(bio);
}
@@ -582,7 +524,7 @@ bool __bio_integrity_endio(struct bio *bio)
struct bio_integrity_payload *bip = bio_integrity(bio);
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
- (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
+ (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) {
INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bip->bip_work);
return false;
@@ -642,14 +584,11 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
BUG_ON(bip_src == NULL);
- bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+ bip = bio_integrity_alloc(bio, gfp_mask, 0);
if (IS_ERR(bip))
return PTR_ERR(bip);
- memcpy(bip->bip_vec, bip_src->bip_vec,
- bip_src->bip_vcnt * sizeof(struct bio_vec));
-
- bip->bip_vcnt = bip_src->bip_vcnt;
+ bip->bip_vec = bip_src->bip_vec;
bip->bip_iter = bip_src->bip_iter;
bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
diff --git a/block/bio.c b/block/bio.c
index e9e809a63c59..a3b1b2266c50 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -953,7 +953,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
bool *same_page)
{
unsigned long mask = queue_segment_boundary(q);
- phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
+ phys_addr_t addr1 = bvec_phys(bv);
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
if ((addr1 | mask) != (addr2 | mask))
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 90b3959d88cf..bd472a30bc61 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -301,19 +301,6 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
}
/**
- * blkg_path - format cgroup path of blkg
- * @blkg: blkg of interest
- * @buf: target buffer
- * @buflen: target buffer length
- *
- * Format the path of the cgroup of @blkg into @buf.
- */
-static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
-{
- return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-}
-
-/**
* blkg_get - get a blkg reference
* @blkg: blkg to get
*
diff --git a/block/blk-core.c b/block/blk-core.c
index 82c3ae22d76d..02bceeb36f2c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
}
EXPORT_SYMBOL(blk_queue_flag_clear);
-/**
- * blk_queue_flag_test_and_set - atomically test and set a queue flag
- * @flag: flag to be set
- * @q: request queue
- *
- * Returns the previous value of @flag - 0 if the flag was not set and 1 if
- * the flag was already set.
- */
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
-{
- return test_and_set_bit(flag, &q->queue_flags);
-}
-EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
-
#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
static const char *const blk_op_name[] = {
REQ_OP_NAME(READ),
@@ -174,6 +160,8 @@ static const struct {
/* Command duration limit device-side timeout */
[BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" },
+ [BLK_STS_INVAL] = { -EINVAL, "invalid" },
+
/* everything else not covered above: */
[BLK_STS_IOERR] = { -EIO, "I/O" },
};
@@ -739,6 +727,18 @@ void submit_bio_noacct_nocheck(struct bio *bio)
__submit_bio_noacct(bio);
}
+static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
+ struct bio *bio)
+{
+ if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
+ return BLK_STS_INVAL;
+
+ if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
+ return BLK_STS_INVAL;
+
+ return BLK_STS_OK;
+}
+
/**
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -782,7 +782,7 @@ void submit_bio_noacct(struct bio *bio)
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE &&
bio_op(bio) != REQ_OP_ZONE_APPEND))
goto end_io;
- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+ if (!bdev_write_cache(bdev)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!bio_sectors(bio)) {
status = BLK_STS_OK;
@@ -791,12 +791,17 @@ void submit_bio_noacct(struct bio *bio)
}
}
- if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ if (!(q->limits.features & BLK_FEAT_POLL))
bio_clear_polled(bio);
switch (bio_op(bio)) {
case REQ_OP_READ:
case REQ_OP_WRITE:
+ if (bio->bi_opf & REQ_ATOMIC) {
+ status = blk_validate_atomic_write_op_size(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ }
break;
case REQ_OP_FLUSH:
/*
@@ -825,11 +830,8 @@ void submit_bio_noacct(struct bio *bio)
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
- if (!bdev_is_zoned(bio->bi_bdev))
- goto not_supported;
- break;
case REQ_OP_ZONE_RESET_ALL:
- if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+ if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
case REQ_OP_DRV_IN:
@@ -915,8 +917,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0;
q = bdev_get_queue(bdev);
- if (cookie == BLK_QC_T_NONE ||
- !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
return 0;
blk_flush_plug(current->plug, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index cca4f9131f79..a72e2a83d075 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -100,23 +100,6 @@ blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
}
-static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
-{
- unsigned int policy = 0;
-
- if (blk_rq_sectors(rq))
- policy |= REQ_FSEQ_DATA;
-
- if (fflags & (1UL << QUEUE_FLAG_WC)) {
- if (rq->cmd_flags & REQ_PREFLUSH)
- policy |= REQ_FSEQ_PREFLUSH;
- if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
- (rq->cmd_flags & REQ_FUA))
- policy |= REQ_FSEQ_POSTFLUSH;
- }
- return policy;
-}
-
static unsigned int blk_flush_cur_seq(struct request *rq)
{
return 1 << ffz(rq->flush.seq);
@@ -399,19 +382,32 @@ static void blk_rq_init_flush(struct request *rq)
bool blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
- unsigned long fflags = q->queue_flags; /* may change, cache */
- unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
+ bool supports_fua = q->limits.features & BLK_FEAT_FUA;
+ unsigned int policy = 0;
/* FLUSH/FUA request must never be merged */
WARN_ON_ONCE(rq->bio != rq->biotail);
+ if (blk_rq_sectors(rq))
+ policy |= REQ_FSEQ_DATA;
+
+ /*
+ * Check which flushes we need to sequence for this operation.
+ */
+ if (blk_queue_write_cache(q)) {
+ if (rq->cmd_flags & REQ_PREFLUSH)
+ policy |= REQ_FSEQ_PREFLUSH;
+ if ((rq->cmd_flags & REQ_FUA) && !supports_fua)
+ policy |= REQ_FSEQ_POSTFLUSH;
+ }
+
/*
* @policy now records what operations need to be done. Adjust
* REQ_PREFLUSH and FUA for the driver.
*/
rq->cmd_flags &= ~REQ_PREFLUSH;
- if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
+ if (!supports_fua)
rq->cmd_flags &= ~REQ_FUA;
/*
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index ccbeb6dfa87a..010decc892ea 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -107,60 +107,6 @@ new_segment:
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
-/**
- * blk_integrity_compare - Compare integrity profile of two disks
- * @gd1: Disk to compare
- * @gd2: Disk to compare
- *
- * Description: Meta-devices like DM and MD need to verify that all
- * sub-devices use the same integrity format before advertising to
- * upper layers that they can send/receive integrity metadata. This
- * function can be used to check whether two gendisk devices have
- * compatible integrity formats.
- */
-int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
-{
- struct blk_integrity *b1 = &gd1->queue->integrity;
- struct blk_integrity *b2 = &gd2->queue->integrity;
-
- if (!b1->profile && !b2->profile)
- return 0;
-
- if (!b1->profile || !b2->profile)
- return -1;
-
- if (b1->interval_exp != b2->interval_exp) {
- pr_err("%s: %s/%s protection interval %u != %u\n",
- __func__, gd1->disk_name, gd2->disk_name,
- 1 << b1->interval_exp, 1 << b2->interval_exp);
- return -1;
- }
-
- if (b1->tuple_size != b2->tuple_size) {
- pr_err("%s: %s/%s tuple sz %u != %u\n", __func__,
- gd1->disk_name, gd2->disk_name,
- b1->tuple_size, b2->tuple_size);
- return -1;
- }
-
- if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
- pr_err("%s: %s/%s tag sz %u != %u\n", __func__,
- gd1->disk_name, gd2->disk_name,
- b1->tag_size, b2->tag_size);
- return -1;
- }
-
- if (b1->profile != b2->profile) {
- pr_err("%s: %s/%s type %s != %s\n", __func__,
- gd1->disk_name, gd2->disk_name,
- b1->profile->name, b2->profile->name);
- return -1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(blk_integrity_compare);
-
bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
struct request *next)
{
@@ -214,7 +160,64 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
static inline struct blk_integrity *dev_to_bi(struct device *dev)
{
- return &dev_to_disk(dev)->queue->integrity;
+ return &dev_to_disk(dev)->queue->limits.integrity;
+}
+
+const char *blk_integrity_profile_name(struct blk_integrity *bi)
+{
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_IP:
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ return "T10-DIF-TYPE1-IP";
+ return "T10-DIF-TYPE3-IP";
+ case BLK_INTEGRITY_CSUM_CRC:
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ return "T10-DIF-TYPE1-CRC";
+ return "T10-DIF-TYPE3-CRC";
+ case BLK_INTEGRITY_CSUM_CRC64:
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ return "EXT-DIF-TYPE1-CRC64";
+ return "EXT-DIF-TYPE3-CRC64";
+ case BLK_INTEGRITY_CSUM_NONE:
+ break;
+ }
+
+ return "nop";
+}
+EXPORT_SYMBOL_GPL(blk_integrity_profile_name);
+
+static ssize_t flag_store(struct device *dev, const char *page, size_t count,
+ unsigned char flag)
+{
+ struct request_queue *q = dev_to_disk(dev)->queue;
+ struct queue_limits lim;
+ unsigned long val;
+ int err;
+
+ err = kstrtoul(page, 10, &val);
+ if (err)
+ return err;
+
+ /* note that the flags are inverted vs the values in the sysfs files */
+ lim = queue_limits_start_update(q);
+ if (val)
+ lim.integrity.flags &= ~flag;
+ else
+ lim.integrity.flags |= flag;
+
+ blk_mq_freeze_queue(q);
+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
+ if (err)
+ return err;
+ return count;
+}
+
+static ssize_t flag_show(struct device *dev, char *page, unsigned char flag)
+{
+ struct blk_integrity *bi = dev_to_bi(dev);
+
+ return sysfs_emit(page, "%d\n", !(bi->flags & flag));
}
static ssize_t format_show(struct device *dev, struct device_attribute *attr,
@@ -222,9 +225,9 @@ static ssize_t format_show(struct device *dev, struct device_attribute *attr,
{
struct blk_integrity *bi = dev_to_bi(dev);
- if (bi->profile && bi->profile->name)
- return sysfs_emit(page, "%s\n", bi->profile->name);
- return sysfs_emit(page, "none\n");
+ if (!bi->tuple_size)
+ return sysfs_emit(page, "none\n");
+ return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi));
}
static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr,
@@ -249,49 +252,26 @@ static ssize_t read_verify_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
- struct blk_integrity *bi = dev_to_bi(dev);
- char *p = (char *) page;
- unsigned long val = simple_strtoul(p, &p, 10);
-
- if (val)
- bi->flags |= BLK_INTEGRITY_VERIFY;
- else
- bi->flags &= ~BLK_INTEGRITY_VERIFY;
-
- return count;
+ return flag_store(dev, page, count, BLK_INTEGRITY_NOVERIFY);
}
static ssize_t read_verify_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- struct blk_integrity *bi = dev_to_bi(dev);
-
- return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_VERIFY));
+ return flag_show(dev, page, BLK_INTEGRITY_NOVERIFY);
}
static ssize_t write_generate_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
- struct blk_integrity *bi = dev_to_bi(dev);
-
- char *p = (char *) page;
- unsigned long val = simple_strtoul(p, &p, 10);
-
- if (val)
- bi->flags |= BLK_INTEGRITY_GENERATE;
- else
- bi->flags &= ~BLK_INTEGRITY_GENERATE;
-
- return count;
+ return flag_store(dev, page, count, BLK_INTEGRITY_NOGENERATE);
}
static ssize_t write_generate_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- struct blk_integrity *bi = dev_to_bi(dev);
-
- return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_GENERATE));
+ return flag_show(dev, page, BLK_INTEGRITY_NOGENERATE);
}
static ssize_t device_is_integrity_capable_show(struct device *dev,
@@ -325,81 +305,3 @@ const struct attribute_group blk_integrity_attr_group = {
.name = "integrity",
.attrs = integrity_attrs,
};
-
-static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
-{
- return BLK_STS_OK;
-}
-
-static void blk_integrity_nop_prepare(struct request *rq)
-{
-}
-
-static void blk_integrity_nop_complete(struct request *rq,
- unsigned int nr_bytes)
-{
-}
-
-static const struct blk_integrity_profile nop_profile = {
- .name = "nop",
- .generate_fn = blk_integrity_nop_fn,
- .verify_fn = blk_integrity_nop_fn,
- .prepare_fn = blk_integrity_nop_prepare,
- .complete_fn = blk_integrity_nop_complete,
-};
-
-/**
- * blk_integrity_register - Register a gendisk as being integrity-capable
- * @disk: struct gendisk pointer to make integrity-aware
- * @template: block integrity profile to register
- *
- * Description: When a device needs to advertise itself as being able to
- * send/receive integrity metadata it must use this function to register
- * the capability with the block layer. The template is a blk_integrity
- * struct with values appropriate for the underlying hardware. See
- * Documentation/block/data-integrity.rst.
- */
-void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
-{
- struct blk_integrity *bi = &disk->queue->integrity;
-
- bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
- template->flags;
- bi->interval_exp = template->interval_exp ? :
- ilog2(queue_logical_block_size(disk->queue));
- bi->profile = template->profile ? template->profile : &nop_profile;
- bi->tuple_size = template->tuple_size;
- bi->tag_size = template->tag_size;
- bi->pi_offset = template->pi_offset;
-
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
-
-#ifdef CONFIG_BLK_INLINE_ENCRYPTION
- if (disk->queue->crypto_profile) {
- pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
- disk->queue->crypto_profile = NULL;
- }
-#endif
-}
-EXPORT_SYMBOL(blk_integrity_register);
-
-/**
- * blk_integrity_unregister - Unregister block integrity profile
- * @disk: disk whose integrity profile to unregister
- *
- * Description: This function unregisters the integrity capability from
- * a block device.
- */
-void blk_integrity_unregister(struct gendisk *disk)
-{
- struct blk_integrity *bi = &disk->queue->integrity;
-
- if (!bi->profile)
- return;
-
- /* ensure all bios are off the integrity workqueue */
- blk_flush_integrity();
- blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue);
- memset(bi, 0, sizeof(*bi));
-}
-EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 442da9dad042..9f735efa6c94 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -103,38 +103,71 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
-static int __blkdev_issue_write_zeroes(struct block_device *bdev,
- sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
+static sector_t bio_write_zeroes_limit(struct block_device *bdev)
{
- struct bio *bio = *biop;
- unsigned int max_sectors;
-
- if (bdev_read_only(bdev))
- return -EPERM;
-
- /* Ensure that max_sectors doesn't overflow bi_size */
- max_sectors = bdev_write_zeroes_sectors(bdev);
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if (max_sectors == 0)
- return -EOPNOTSUPP;
+ return min(bdev_write_zeroes_sectors(bdev),
+ (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
+}
+static void __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned flags)
+{
while (nr_sects) {
- unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+ unsigned int len = min_t(sector_t, nr_sects,
+ bio_write_zeroes_limit(bdev));
+ struct bio *bio;
+
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
+ bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ *biop = bio_chain_and_submit(*biop, bio);
+
nr_sects -= len;
sector += len;
cond_resched();
}
+}
- *biop = bio;
- return 0;
+static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ /*
+ * For some devices there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported. These will clear the capability
+ * on an I/O error, in which case we'll turn any error into
+ * "not supported" here.
+ */
+ if (ret && !bdev_write_zeroes_sectors(bdev))
+ return -EOPNOTSUPP;
+ return ret;
}
/*
@@ -150,35 +183,63 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
return min(pages, (sector_t)BIO_MAX_VECS);
}
-static int __blkdev_issue_zero_pages(struct block_device *bdev,
+static void __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop)
+ struct bio **biop, unsigned int flags)
{
- struct bio *bio = *biop;
- int bi_size = 0;
- unsigned int sz;
-
- if (bdev_read_only(bdev))
- return -EPERM;
+ while (nr_sects) {
+ unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+ struct bio *bio;
- while (nr_sects != 0) {
- bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
- REQ_OP_WRITE, gfp_mask);
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
- bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
- nr_sects -= bi_size >> 9;
- sector += bi_size >> 9;
- if (bi_size < sz)
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
+
+ do {
+ unsigned int len, added;
+
+ len = min_t(sector_t,
+ PAGE_SIZE, nr_sects << SECTOR_SHIFT);
+ added = bio_add_page(bio, ZERO_PAGE(0), len, 0);
+ if (added < len)
break;
- }
+ nr_sects -= added >> SECTOR_SHIFT;
+ sector += added >> SECTOR_SHIFT;
+ } while (nr_sects);
+
+ *biop = bio_chain_and_submit(*biop, bio);
cond_resched();
}
+}
- *biop = bio;
- return 0;
+static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
}
/**
@@ -204,20 +265,19 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
- int ret;
- sector_t bs_mask;
-
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
- biop, flags);
- if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
- return ret;
+ if (bdev_read_only(bdev))
+ return -EPERM;
- return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
- biop);
+ if (bdev_write_zeroes_sectors(bdev)) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, biop, flags);
+ } else {
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop, flags);
+ }
+ return 0;
}
EXPORT_SYMBOL(__blkdev_issue_zeroout);
@@ -237,51 +297,21 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
- int ret = 0;
- sector_t bs_mask;
- struct bio *bio;
- struct blk_plug plug;
- bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
+ int ret;
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
+ if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
-retry:
- bio = NULL;
- blk_start_plug(&plug);
- if (try_write_zeroes) {
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
- gfp_mask, &bio, flags);
- } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
- gfp_mask, &bio);
- } else {
- /* No zeroing offload support */
- ret = -EOPNOTSUPP;
- }
- if (ret == 0 && bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- }
- blk_finish_plug(&plug);
- if (ret && try_write_zeroes) {
- if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- try_write_zeroes = false;
- goto retry;
- }
- if (!bdev_write_zeroes_sectors(bdev)) {
- /*
- * Zeroing offload support was indicated, but the
- * device reported ILLEGAL REQUEST (for some devices
- * there is no non-destructive way to verify whether
- * WRITE ZEROES is actually supported).
- */
- ret = -EOPNOTSUPP;
- }
+ if (bdev_write_zeroes_sectors(bdev)) {
+ ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, flags);
+ if (ret != -EOPNOTSUPP)
+ return ret;
}
- return ret;
+ return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-map.c b/block/blk-map.c
index 71210cdb3442..bce144091128 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -634,7 +634,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
const struct iov_iter *iter, gfp_t gfp_mask)
{
bool copy = false, map_bvec = false;
- unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
+ unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
struct bio *bio = NULL;
struct iov_iter i;
int ret = -EINVAL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8534c35e0497..de5281bcadc5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -154,6 +154,19 @@ static struct bio *bio_split_write_zeroes(struct bio *bio,
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
}
+static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
+ bool is_atomic)
+{
+ /*
+ * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
+ * both non-zero.
+ */
+ if (is_atomic && lim->atomic_write_boundary_sectors)
+ return lim->atomic_write_boundary_sectors;
+
+ return lim->chunk_sectors;
+}
+
/*
* Return the maximum number of sectors from the start of a bio that may be
* submitted as a single request to a block device. If enough sectors remain,
@@ -167,12 +180,23 @@ static inline unsigned get_max_io_size(struct bio *bio,
{
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
- unsigned max_sectors = lim->max_sectors, start, end;
+ bool is_atomic = bio->bi_opf & REQ_ATOMIC;
+ unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
+ unsigned max_sectors, start, end;
- if (lim->chunk_sectors) {
+ /*
+ * We ignore lim->max_sectors for atomic writes because it may less
+ * than the actual bio size, which we cannot tolerate.
+ */
+ if (is_atomic)
+ max_sectors = lim->atomic_write_max_sectors;
+ else
+ max_sectors = lim->max_sectors;
+
+ if (boundary_sectors) {
max_sectors = min(max_sectors,
- blk_chunk_sectors_left(bio->bi_iter.bi_sector,
- lim->chunk_sectors));
+ blk_boundary_sectors_left(bio->bi_iter.bi_sector,
+ boundary_sectors));
}
start = bio->bi_iter.bi_sector & (pbs - 1);
@@ -185,23 +209,22 @@ static inline unsigned get_max_io_size(struct bio *bio,
/**
* get_max_segment_size() - maximum number of bytes to add as a single segment
* @lim: Request queue limits.
- * @start_page: See below.
- * @offset: Offset from @start_page where to add a segment.
+ * @paddr: address of the range to add
+ * @len: maximum length available to add at @paddr
*
- * Returns the maximum number of bytes that can be added as a single segment.
+ * Returns the maximum number of bytes of the range starting at @paddr that can
+ * be added to a single segment.
*/
static inline unsigned get_max_segment_size(const struct queue_limits *lim,
- struct page *start_page, unsigned long offset)
+ phys_addr_t paddr, unsigned int len)
{
- unsigned long mask = lim->seg_boundary_mask;
-
- offset = mask & (page_to_phys(start_page) + offset);
-
/*
* Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
* after having calculated the minimum.
*/
- return min(mask - offset, (unsigned long)lim->max_segment_size - 1) + 1;
+ return min_t(unsigned long, len,
+ min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
+ (unsigned long)lim->max_segment_size - 1) + 1);
}
/**
@@ -234,9 +257,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
unsigned seg_size = 0;
while (len && *nsegs < max_segs) {
- seg_size = get_max_segment_size(lim, bv->bv_page,
- bv->bv_offset + total_len);
- seg_size = min(seg_size, len);
+ seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
(*nsegs)++;
total_len += seg_size;
@@ -305,6 +326,11 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
*segs = nsegs;
return NULL;
split:
+ if (bio->bi_opf & REQ_ATOMIC) {
+ bio->bi_status = BLK_STS_INVAL;
+ bio_endio(bio);
+ return ERR_PTR(-EINVAL);
+ }
/*
* We can't sanely support splitting for a REQ_NOWAIT bio. End it
* with EAGAIN if splitting is required and return an error pointer.
@@ -465,8 +491,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
while (nbytes > 0) {
unsigned offset = bvec->bv_offset + total;
- unsigned len = min(get_max_segment_size(&q->limits,
- bvec->bv_page, offset), nbytes);
+ unsigned len = get_max_segment_size(&q->limits,
+ bvec_phys(bvec) + total, nbytes);
struct page *page = bvec->bv_page;
/*
@@ -588,18 +614,22 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
sector_t offset)
{
struct request_queue *q = rq->q;
- unsigned int max_sectors;
+ struct queue_limits *lim = &q->limits;
+ unsigned int max_sectors, boundary_sectors;
+ bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
if (blk_rq_is_passthrough(rq))
return q->limits.max_hw_sectors;
- max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
- if (!q->limits.chunk_sectors ||
+ boundary_sectors = blk_boundary_sectors(lim, is_atomic);
+ max_sectors = blk_queue_get_max_sectors(rq);
+
+ if (!boundary_sectors ||
req_op(rq) == REQ_OP_DISCARD ||
req_op(rq) == REQ_OP_SECURE_ERASE)
return max_sectors;
return min(max_sectors,
- blk_chunk_sectors_left(offset, q->limits.chunk_sectors));
+ blk_boundary_sectors_left(offset, boundary_sectors));
}
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
@@ -797,6 +827,18 @@ static enum elv_merge blk_try_req_merge(struct request *req,
return ELEVATOR_NO_MERGE;
}
+static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
+ struct bio *bio)
+{
+ return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
+}
+
+static bool blk_atomic_write_mergeable_rqs(struct request *rq,
+ struct request *next)
+{
+ return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
+}
+
/*
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
@@ -820,6 +862,9 @@ static struct request *attempt_merge(struct request_queue *q,
if (req->ioprio != next->ioprio)
return NULL;
+ if (!blk_atomic_write_mergeable_rqs(req, next))
+ return NULL;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -951,6 +996,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (rq->ioprio != bio_prio(bio))
return false;
+ if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
+ return false;
+
return true;
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 770c0c2b72fa..344f9e503bdb 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -84,28 +84,15 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(NOMERGES),
QUEUE_FLAG_NAME(SAME_COMP),
QUEUE_FLAG_NAME(FAIL_IO),
- QUEUE_FLAG_NAME(NONROT),
- QUEUE_FLAG_NAME(IO_STAT),
QUEUE_FLAG_NAME(NOXMERGES),
- QUEUE_FLAG_NAME(ADD_RANDOM),
- QUEUE_FLAG_NAME(SYNCHRONOUS),
QUEUE_FLAG_NAME(SAME_FORCE),
QUEUE_FLAG_NAME(INIT_DONE),
- QUEUE_FLAG_NAME(STABLE_WRITES),
- QUEUE_FLAG_NAME(POLL),
- QUEUE_FLAG_NAME(WC),
- QUEUE_FLAG_NAME(FUA),
- QUEUE_FLAG_NAME(DAX),
QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(QUIESCED),
- QUEUE_FLAG_NAME(PCI_P2PDMA),
- QUEUE_FLAG_NAME(ZONE_RESETALL),
QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
QUEUE_FLAG_NAME(HCTX_ACTIVE),
- QUEUE_FLAG_NAME(NOWAIT),
QUEUE_FLAG_NAME(SQ_SCHED),
- QUEUE_FLAG_NAME(SKIP_TAGSET_QUIESCE),
};
#undef QUEUE_FLAG_NAME
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3b4df8e5ac9e..e3c3c0c21b55 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -448,6 +448,10 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
+retry:
+ data->ctx = blk_mq_get_ctx(q);
+ data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
+
if (q->elevator) {
/*
* All requests use scheduler tags when an I/O scheduler is
@@ -469,13 +473,9 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (ops->limit_depth)
ops->limit_depth(data->cmd_flags, data);
}
- }
-
-retry:
- data->ctx = blk_mq_get_ctx(q);
- data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!(data->rq_flags & RQF_SCHED_TAGS))
+ } else {
blk_mq_tag_busy(data->hctx);
+ }
if (data->flags & BLK_MQ_REQ_RESERVED)
data->rq_flags |= RQF_RESV;
@@ -804,10 +804,8 @@ static void blk_complete_request(struct request *req)
if (!bio)
return;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ)
- req->q->integrity.profile->complete_fn(req, total_bytes);
-#endif
+ blk_integrity_complete(req, total_bytes);
/*
* Upper layers may call blk_crypto_evict_key() anytime after the last
@@ -875,11 +873,9 @@ bool blk_update_request(struct request *req, blk_status_t error,
if (!req->bio)
return false;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
error == BLK_STS_OK)
- req->q->integrity.profile->complete_fn(req, nr_bytes);
-#endif
+ blk_integrity_complete(req, nr_bytes);
/*
* Upper layers may call blk_crypto_evict_key() anytime after the last
@@ -1264,10 +1260,9 @@ void blk_mq_start_request(struct request *rq)
WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
rq->mq_hctx->tags->rqs[rq->tag] = rq;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
- q->integrity.profile->prepare_fn(rq);
-#endif
+ blk_integrity_prepare(rq);
+
if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
}
@@ -2914,6 +2909,17 @@ static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
INIT_LIST_HEAD(&rq->queuelist);
}
+static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
+{
+ unsigned int bs_mask = queue_logical_block_size(q) - 1;
+
+ /* .bi_sector of any zero sized bio need to be initialized */
+ if ((bio->bi_iter.bi_size & bs_mask) ||
+ ((bio->bi_iter.bi_sector << SECTOR_SHIFT) & bs_mask))
+ return true;
+ return false;
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2966,6 +2972,15 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
+ /*
+ * Device reconfiguration may change logical block size, so alignment
+ * check has to be done with queue usage counter held
+ */
+ if (unlikely(bio_unaligned(bio, q))) {
+ bio_io_error(bio);
+ goto queue_exit;
+ }
+
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
@@ -3041,7 +3056,7 @@ queue_exit:
blk_status_t blk_insert_cloned_request(struct request *rq)
{
struct request_queue *q = rq->q;
- unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+ unsigned int max_sectors = blk_queue_get_max_sectors(rq);
unsigned int max_segments = blk_rq_get_max_segments(rq);
blk_status_t ret;
@@ -4114,6 +4129,12 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
+static bool blk_mq_can_poll(struct blk_mq_tag_set *set)
+{
+ return set->nr_maps > HCTX_TYPE_POLL &&
+ set->map[HCTX_TYPE_POLL].nr_queues;
+}
+
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
struct queue_limits *lim, void *queuedata)
{
@@ -4121,7 +4142,13 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
struct request_queue *q;
int ret;
- q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
+ if (!lim)
+ lim = &default_lim;
+ lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
+ if (blk_mq_can_poll(set))
+ lim->features |= BLK_FEAT_POLL;
+
+ q = blk_alloc_queue(lim, set->numa_node);
if (IS_ERR(q))
return q;
q->queuedata = queuedata;
@@ -4274,17 +4301,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
-static void blk_mq_update_poll_flag(struct request_queue *q)
-{
- struct blk_mq_tag_set *set = q->tag_set;
-
- if (set->nr_maps > HCTX_TYPE_POLL &&
- set->map[HCTX_TYPE_POLL].nr_queues)
- blk_queue_flag_set(QUEUE_FLAG_POLL, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
-}
-
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q)
{
@@ -4312,7 +4328,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
- blk_mq_update_poll_flag(q);
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
INIT_LIST_HEAD(&q->flush_list);
@@ -4636,13 +4651,15 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
int ret;
unsigned long i;
+ if (WARN_ON_ONCE(!q->mq_freeze_depth))
+ return -EINVAL;
+
if (!set)
return -EINVAL;
if (q->nr_requests == nr)
return 0;
- blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
ret = 0;
@@ -4676,7 +4693,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
}
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
return ret;
}
@@ -4798,8 +4814,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ struct queue_limits lim;
+
blk_mq_realloc_hw_ctxs(set, q);
- blk_mq_update_poll_flag(q);
+
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
@@ -4811,6 +4829,13 @@ fallback:
set->nr_hw_queues = prev_nr_hw_queues;
goto fallback;
}
+ lim = queue_limits_start_update(q);
+ if (blk_mq_can_poll(set))
+ lim.features |= BLK_FEAT_POLL;
+ else
+ lim.features &= ~BLK_FEAT_POLL;
+ if (queue_limits_commit_update(q, &lim) < 0)
+ pr_warn("updating the poll flag failed\n");
blk_mq_map_swqueue(q);
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index effeb9a639bb..cd8a8eabc9a5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -6,7 +6,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
-#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
#include <linux/pagemap.h>
#include <linux/backing-dev-defs.h>
#include <linux/gcd.h>
@@ -55,7 +55,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
}
EXPORT_SYMBOL(blk_set_stacking_limits);
-static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+void blk_apply_bdi_limits(struct backing_dev_info *bdi,
struct queue_limits *lim)
{
/*
@@ -68,7 +68,7 @@ static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
static int blk_validate_zoned_limits(struct queue_limits *lim)
{
- if (!lim->zoned) {
+ if (!(lim->features & BLK_FEAT_ZONED)) {
if (WARN_ON_ONCE(lim->max_open_zones) ||
WARN_ON_ONCE(lim->max_active_zones) ||
WARN_ON_ONCE(lim->zone_write_granularity) ||
@@ -80,6 +80,14 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
return -EINVAL;
+ /*
+ * Given that active zones include open zones, the maximum number of
+ * open zones cannot be larger than the maximum number of active zones.
+ */
+ if (lim->max_active_zones &&
+ lim->max_open_zones > lim->max_active_zones)
+ return -EINVAL;
+
if (lim->zone_write_granularity < lim->logical_block_size)
lim->zone_write_granularity = lim->logical_block_size;
@@ -97,6 +105,120 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
return 0;
}
+static int blk_validate_integrity_limits(struct queue_limits *lim)
+{
+ struct blk_integrity *bi = &lim->integrity;
+
+ if (!bi->tuple_size) {
+ if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE ||
+ bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) {
+ pr_warn("invalid PI settings.\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
+ pr_warn("integrity support disabled.\n");
+ return -EINVAL;
+ }
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE &&
+ (bi->flags & BLK_INTEGRITY_REF_TAG)) {
+ pr_warn("ref tag not support without checksum.\n");
+ return -EINVAL;
+ }
+
+ if (!bi->interval_exp)
+ bi->interval_exp = ilog2(lim->logical_block_size);
+
+ return 0;
+}
+
+/*
+ * Returns max guaranteed bytes which we can fit in a bio.
+ *
+ * We request that an atomic_write is ITER_UBUF iov_iter (so a single vector),
+ * so we assume that we can fit in at least PAGE_SIZE in a segment, apart from
+ * the first and last segments.
+ */
+static unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *lim)
+{
+ unsigned int max_segments = min(BIO_MAX_VECS, lim->max_segments);
+ unsigned int length;
+
+ length = min(max_segments, 2) * lim->logical_block_size;
+ if (max_segments > 2)
+ length += (max_segments - 2) * PAGE_SIZE;
+
+ return length;
+}
+
+static void blk_atomic_writes_update_limits(struct queue_limits *lim)
+{
+ unsigned int unit_limit = min(lim->max_hw_sectors << SECTOR_SHIFT,
+ blk_queue_max_guaranteed_bio(lim));
+
+ unit_limit = rounddown_pow_of_two(unit_limit);
+
+ lim->atomic_write_max_sectors =
+ min(lim->atomic_write_hw_max >> SECTOR_SHIFT,
+ lim->max_hw_sectors);
+ lim->atomic_write_unit_min =
+ min(lim->atomic_write_hw_unit_min, unit_limit);
+ lim->atomic_write_unit_max =
+ min(lim->atomic_write_hw_unit_max, unit_limit);
+ lim->atomic_write_boundary_sectors =
+ lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
+}
+
+static void blk_validate_atomic_write_limits(struct queue_limits *lim)
+{
+ unsigned int boundary_sectors;
+
+ if (!lim->atomic_write_hw_max)
+ goto unsupported;
+
+ boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
+
+ if (boundary_sectors) {
+ /*
+ * A feature of boundary support is that it disallows bios to
+ * be merged which would result in a merged request which
+ * crosses either a chunk sector or atomic write HW boundary,
+ * even though chunk sectors may be just set for performance.
+ * For simplicity, disallow atomic writes for a chunk sector
+ * which is non-zero and smaller than atomic write HW boundary.
+ * Furthermore, chunk sectors must be a multiple of atomic
+ * write HW boundary. Otherwise boundary support becomes
+ * complicated.
+ * Devices which do not conform to these rules can be dealt
+ * with if and when they show up.
+ */
+ if (WARN_ON_ONCE(lim->chunk_sectors % boundary_sectors))
+ goto unsupported;
+
+ /*
+ * The boundary size just needs to be a multiple of unit_max
+ * (and not necessarily a power-of-2), so this following check
+ * could be relaxed in future.
+ * Furthermore, if needed, unit_max could even be reduced so
+ * that it is compliant with a !power-of-2 boundary.
+ */
+ if (!is_power_of_2(boundary_sectors))
+ goto unsupported;
+ }
+
+ blk_atomic_writes_update_limits(lim);
+ return;
+
+unsupported:
+ lim->atomic_write_max_sectors = 0;
+ lim->atomic_write_boundary_sectors = 0;
+ lim->atomic_write_unit_min = 0;
+ lim->atomic_write_unit_max = 0;
+}
+
/*
* Check that the limits in lim are valid, initialize defaults for unset
* values, and cap values based on others where needed.
@@ -105,6 +227,7 @@ static int blk_validate_limits(struct queue_limits *lim)
{
unsigned int max_hw_sectors;
unsigned int logical_block_sectors;
+ int err;
/*
* Unless otherwise specified, default to 512 byte logical blocks and a
@@ -112,6 +235,10 @@ static int blk_validate_limits(struct queue_limits *lim)
*/
if (!lim->logical_block_size)
lim->logical_block_size = SECTOR_SIZE;
+ else if (blk_validate_block_size(lim->logical_block_size)) {
+ pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size);
+ return -EINVAL;
+ }
if (lim->physical_block_size < lim->logical_block_size)
lim->physical_block_size = lim->logical_block_size;
@@ -153,6 +280,12 @@ static int blk_validate_limits(struct queue_limits *lim)
if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
+ } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
+ lim->max_sectors =
+ min(max_hw_sectors, lim->io_opt >> SECTOR_SHIFT);
+ } else if (lim->io_min > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
+ lim->max_sectors =
+ min(max_hw_sectors, lim->io_min >> SECTOR_SHIFT);
} else {
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
}
@@ -220,9 +353,17 @@ static int blk_validate_limits(struct queue_limits *lim)
if (lim->alignment_offset) {
lim->alignment_offset &= (lim->physical_block_size - 1);
- lim->misaligned = 0;
+ lim->flags &= ~BLK_FLAG_MISALIGNED;
}
+ if (!(lim->features & BLK_FEAT_WRITE_CACHE))
+ lim->features &= ~BLK_FEAT_FUA;
+
+ blk_validate_atomic_write_limits(lim);
+
+ err = blk_validate_integrity_limits(lim);
+ if (err)
+ return err;
return blk_validate_zoned_limits(lim);
}
@@ -254,15 +395,25 @@ int blk_set_default_limits(struct queue_limits *lim)
*/
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim)
- __releases(q->limits_lock)
{
- int error = blk_validate_limits(lim);
+ int error;
- if (!error) {
- q->limits = *lim;
- if (q->disk)
- blk_apply_bdi_limits(q->disk->bdi, lim);
+ error = blk_validate_limits(lim);
+ if (error)
+ goto out_unlock;
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+ if (q->crypto_profile && lim->integrity.tag_size) {
+ pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n");
+ error = -EINVAL;
+ goto out_unlock;
}
+#endif
+
+ q->limits = *lim;
+ if (q->disk)
+ blk_apply_bdi_limits(q->disk->bdi, lim);
+out_unlock:
mutex_unlock(&q->limits_lock);
return error;
}
@@ -287,204 +438,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
EXPORT_SYMBOL_GPL(queue_limits_set);
/**
- * blk_queue_chunk_sectors - set size of the chunk for this queue
- * @q: the request queue for the device
- * @chunk_sectors: chunk sectors in the usual 512b unit
- *
- * Description:
- * If a driver doesn't want IOs to cross a given chunk size, it can set
- * this limit and prevent merging across chunks. Note that the block layer
- * must accept a page worth of data at any offset. So if the crossing of
- * chunks is a hard limitation in the driver, it must still be prepared
- * to split single page bios.
- **/
-void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
-{
- q->limits.chunk_sectors = chunk_sectors;
-}
-EXPORT_SYMBOL(blk_queue_chunk_sectors);
-
-/**
- * blk_queue_max_discard_sectors - set max sectors for a single discard
- * @q: the request queue for the device
- * @max_discard_sectors: maximum number of sectors to discard
- **/
-void blk_queue_max_discard_sectors(struct request_queue *q,
- unsigned int max_discard_sectors)
-{
- struct queue_limits *lim = &q->limits;
-
- lim->max_hw_discard_sectors = max_discard_sectors;
- lim->max_discard_sectors =
- min(max_discard_sectors, lim->max_user_discard_sectors);
-}
-EXPORT_SYMBOL(blk_queue_max_discard_sectors);
-
-/**
- * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
- * @q: the request queue for the device
- * @max_sectors: maximum number of sectors to secure_erase
- **/
-void blk_queue_max_secure_erase_sectors(struct request_queue *q,
- unsigned int max_sectors)
-{
- q->limits.max_secure_erase_sectors = max_sectors;
-}
-EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
-
-/**
- * blk_queue_max_write_zeroes_sectors - set max sectors for a single
- * write zeroes
- * @q: the request queue for the device
- * @max_write_zeroes_sectors: maximum number of sectors to write per command
- **/
-void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
- unsigned int max_write_zeroes_sectors)
-{
- q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
-}
-EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
-
-/**
- * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
- * @q: the request queue for the device
- * @max_zone_append_sectors: maximum number of sectors to write per command
- *
- * Sets the maximum number of sectors allowed for zone append commands. If
- * Specifying 0 for @max_zone_append_sectors indicates that the queue does
- * not natively support zone append operations and that the block layer must
- * emulate these operations using regular writes.
- **/
-void blk_queue_max_zone_append_sectors(struct request_queue *q,
- unsigned int max_zone_append_sectors)
-{
- unsigned int max_sectors = 0;
-
- if (WARN_ON(!blk_queue_is_zoned(q)))
- return;
-
- if (max_zone_append_sectors) {
- max_sectors = min(q->limits.max_hw_sectors,
- max_zone_append_sectors);
- max_sectors = min(q->limits.chunk_sectors, max_sectors);
-
- /*
- * Signal eventual driver bugs resulting in the max_zone_append
- * sectors limit being 0 due to the chunk_sectors limit (zone
- * size) not set or the max_hw_sectors limit not set.
- */
- WARN_ON_ONCE(!max_sectors);
- }
-
- q->limits.max_zone_append_sectors = max_sectors;
-}
-EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
-
-/**
- * blk_queue_logical_block_size - set logical block size for the queue
- * @q: the request queue for the device
- * @size: the logical block size, in bytes
- *
- * Description:
- * This should be set to the lowest possible block size that the
- * storage device can address. The default of 512 covers most
- * hardware.
- **/
-void blk_queue_logical_block_size(struct request_queue *q, unsigned int size)
-{
- struct queue_limits *limits = &q->limits;
-
- limits->logical_block_size = size;
-
- if (limits->discard_granularity < limits->logical_block_size)
- limits->discard_granularity = limits->logical_block_size;
-
- if (limits->physical_block_size < size)
- limits->physical_block_size = size;
-
- if (limits->io_min < limits->physical_block_size)
- limits->io_min = limits->physical_block_size;
-
- limits->max_hw_sectors =
- round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT);
- limits->max_sectors =
- round_down(limits->max_sectors, size >> SECTOR_SHIFT);
-}
-EXPORT_SYMBOL(blk_queue_logical_block_size);
-
-/**
- * blk_queue_physical_block_size - set physical block size for the queue
- * @q: the request queue for the device
- * @size: the physical block size, in bytes
- *
- * Description:
- * This should be set to the lowest possible sector size that the
- * hardware can operate on without reverting to read-modify-write
- * operations.
- */
-void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
-{
- q->limits.physical_block_size = size;
-
- if (q->limits.physical_block_size < q->limits.logical_block_size)
- q->limits.physical_block_size = q->limits.logical_block_size;
-
- if (q->limits.discard_granularity < q->limits.physical_block_size)
- q->limits.discard_granularity = q->limits.physical_block_size;
-
- if (q->limits.io_min < q->limits.physical_block_size)
- q->limits.io_min = q->limits.physical_block_size;
-}
-EXPORT_SYMBOL(blk_queue_physical_block_size);
-
-/**
- * blk_queue_zone_write_granularity - set zone write granularity for the queue
- * @q: the request queue for the zoned device
- * @size: the zone write granularity size, in bytes
- *
- * Description:
- * This should be set to the lowest possible size allowing to write in
- * sequential zones of a zoned block device.
- */
-void blk_queue_zone_write_granularity(struct request_queue *q,
- unsigned int size)
-{
- if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
- return;
-
- q->limits.zone_write_granularity = size;
-
- if (q->limits.zone_write_granularity < q->limits.logical_block_size)
- q->limits.zone_write_granularity = q->limits.logical_block_size;
-}
-EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity);
-
-/**
- * blk_queue_alignment_offset - set physical block alignment offset
- * @q: the request queue for the device
- * @offset: alignment offset in bytes
- *
- * Description:
- * Some devices are naturally misaligned to compensate for things like
- * the legacy DOS partition table 63-sector offset. Low-level drivers
- * should call this function for devices whose first sector is not
- * naturally aligned.
- */
-void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
-{
- q->limits.alignment_offset =
- offset & (q->limits.physical_block_size - 1);
- q->limits.misaligned = 0;
-}
-EXPORT_SYMBOL(blk_queue_alignment_offset);
-
-void disk_update_readahead(struct gendisk *disk)
-{
- blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
-}
-EXPORT_SYMBOL_GPL(disk_update_readahead);
-
-/**
* blk_limits_io_min - set minimum request size for a device
* @limits: the queue limits
* @min: smallest I/O size in bytes
@@ -508,26 +461,6 @@ void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
EXPORT_SYMBOL(blk_limits_io_min);
/**
- * blk_queue_io_min - set minimum request size for the queue
- * @q: the request queue for the device
- * @min: smallest I/O size in bytes
- *
- * Description:
- * Storage devices may report a granularity or preferred minimum I/O
- * size which is the smallest request the device can perform without
- * incurring a performance penalty. For disk drives this is often the
- * physical block size. For RAID arrays it is often the stripe chunk
- * size. A properly aligned multiple of minimum_io_size is the
- * preferred request size for workloads where a high number of I/O
- * operations is desired.
- */
-void blk_queue_io_min(struct request_queue *q, unsigned int min)
-{
- blk_limits_io_min(&q->limits, min);
-}
-EXPORT_SYMBOL(blk_queue_io_min);
-
-/**
* blk_limits_io_opt - set optimal request size for a device
* @limits: the queue limits
* @opt: smallest I/O size in bytes
@@ -614,6 +547,21 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
{
unsigned int top, bottom, alignment, ret = 0;
+ t->features |= (b->features & BLK_FEAT_INHERIT_MASK);
+
+ /*
+ * BLK_FEAT_NOWAIT and BLK_FEAT_POLL need to be supported both by the
+ * stacking driver and all underlying devices. The stacking driver sets
+ * the flags before stacking the limits, and this will clear the flags
+ * if any of the underlying devices does not support it.
+ */
+ if (!(b->features & BLK_FEAT_NOWAIT))
+ t->features &= ~BLK_FEAT_NOWAIT;
+ if (!(b->features & BLK_FEAT_POLL))
+ t->features &= ~BLK_FEAT_POLL;
+
+ t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
+
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_user_sectors = min_not_zero(t->max_user_sectors,
b->max_user_sectors);
@@ -623,7 +571,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_write_zeroes_sectors);
t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t),
queue_limits_max_zone_append_sectors(b));
- t->bounce = max(t->bounce, b->bounce);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
b->seg_boundary_mask);
@@ -639,8 +586,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
- t->misaligned |= b->misaligned;
-
alignment = queue_limit_alignment_offset(b, start);
/* Bottom device has different alignment. Check that it is
@@ -654,7 +599,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Verify that top and bottom intervals line up */
if (max(top, bottom) % min(top, bottom)) {
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
}
@@ -676,42 +621,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Physical block size a multiple of the logical block size? */
if (t->physical_block_size & (t->logical_block_size - 1)) {
t->physical_block_size = t->logical_block_size;
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* Minimum I/O a multiple of the physical block size? */
if (t->io_min & (t->physical_block_size - 1)) {
t->io_min = t->physical_block_size;
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* Optimal I/O a multiple of the physical block size? */
if (t->io_opt & (t->physical_block_size - 1)) {
t->io_opt = 0;
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* chunk_sectors a multiple of the physical block size? */
if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
t->chunk_sectors = 0;
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
- t->raid_partial_stripes_expensive =
- max(t->raid_partial_stripes_expensive,
- b->raid_partial_stripes_expensive);
-
/* Find lowest common alignment_offset */
t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment)
% max(t->physical_block_size, t->io_min);
/* Verify that new alignment_offset is on a logical block boundary */
if (t->alignment_offset & (t->logical_block_size - 1)) {
- t->misaligned = 1;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
@@ -723,16 +664,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
if (b->discard_granularity) {
alignment = queue_limit_discard_alignment(b, start);
- if (t->discard_granularity != 0 &&
- t->discard_alignment != alignment) {
- top = t->discard_granularity + t->discard_alignment;
- bottom = b->discard_granularity + alignment;
-
- /* Verify that top and bottom intervals line up */
- if ((max(top, bottom) % min(top, bottom)) != 0)
- t->discard_misaligned = 1;
- }
-
t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
b->max_discard_sectors);
t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
@@ -746,8 +677,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_secure_erase_sectors);
t->zone_write_granularity = max(t->zone_write_granularity,
b->zone_write_granularity);
- t->zoned = max(t->zoned, b->zoned);
- if (!t->zoned) {
+ if (!(t->features & BLK_FEAT_ZONED)) {
t->zone_write_granularity = 0;
t->max_zone_append_sectors = 0;
}
@@ -781,21 +711,65 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);
/**
- * blk_queue_update_dma_pad - update pad mask
- * @q: the request queue for the device
- * @mask: pad mask
+ * queue_limits_stack_integrity - stack integrity profile
+ * @t: target queue limits
+ * @b: base queue limits
*
- * Update dma pad mask.
+ * Check if the integrity profile in the @b can be stacked into the
+ * target @t. Stacking is possible if either:
*
- * Appending pad buffer to a request modifies the last entry of a
- * scatter list such that it includes the pad buffer.
- **/
-void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
-{
- if (mask > q->dma_pad_mask)
- q->dma_pad_mask = mask;
+ * a) does not have any integrity information stacked into it yet
+ * b) the integrity profile in @b is identical to the one in @t
+ *
+ * If @b can be stacked into @t, return %true. Else return %false and clear the
+ * integrity information in @t.
+ */
+bool queue_limits_stack_integrity(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ struct blk_integrity *ti = &t->integrity;
+ struct blk_integrity *bi = &b->integrity;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return true;
+
+ if (!ti->tuple_size) {
+ /* inherit the settings from the first underlying device */
+ if (!(ti->flags & BLK_INTEGRITY_STACKED)) {
+ ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE |
+ (bi->flags & BLK_INTEGRITY_REF_TAG);
+ ti->csum_type = bi->csum_type;
+ ti->tuple_size = bi->tuple_size;
+ ti->pi_offset = bi->pi_offset;
+ ti->interval_exp = bi->interval_exp;
+ ti->tag_size = bi->tag_size;
+ goto done;
+ }
+ if (!bi->tuple_size)
+ goto done;
+ }
+
+ if (ti->tuple_size != bi->tuple_size)
+ goto incompatible;
+ if (ti->interval_exp != bi->interval_exp)
+ goto incompatible;
+ if (ti->tag_size != bi->tag_size)
+ goto incompatible;
+ if (ti->csum_type != bi->csum_type)
+ goto incompatible;
+ if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
+ (bi->flags & BLK_INTEGRITY_REF_TAG))
+ goto incompatible;
+
+done:
+ ti->flags |= BLK_INTEGRITY_STACKED;
+ return true;
+
+incompatible:
+ memset(ti, 0, sizeof(*ti));
+ return false;
}
-EXPORT_SYMBOL(blk_queue_update_dma_pad);
+EXPORT_SYMBOL_GPL(queue_limits_stack_integrity);
/**
* blk_set_queue_depth - tell the block layer about the device queue depth
@@ -810,54 +784,11 @@ void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
}
EXPORT_SYMBOL(blk_set_queue_depth);
-/**
- * blk_queue_write_cache - configure queue's write cache
- * @q: the request queue for the device
- * @wc: write back cache on or off
- * @fua: device supports FUA writes, if true
- *
- * Tell the block layer about the write cache of @q.
- */
-void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
-{
- if (wc) {
- blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
- blk_queue_flag_set(QUEUE_FLAG_WC, q);
- } else {
- blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
- blk_queue_flag_clear(QUEUE_FLAG_WC, q);
- }
- if (fua)
- blk_queue_flag_set(QUEUE_FLAG_FUA, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
-}
-EXPORT_SYMBOL_GPL(blk_queue_write_cache);
-
-/**
- * disk_set_zoned - inidicate a zoned device
- * @disk: gendisk to configure
- */
-void disk_set_zoned(struct gendisk *disk)
-{
- struct request_queue *q = disk->queue;
-
- WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED));
-
- /*
- * Set the zone write granularity to the device logical block
- * size by default. The driver can change this value if needed.
- */
- q->limits.zoned = true;
- blk_queue_zone_write_granularity(q, queue_logical_block_size(q));
-}
-EXPORT_SYMBOL_GPL(disk_set_zoned);
-
int bdev_alignment_offset(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- if (q->limits.misaligned)
+ if (q->limits.flags & BLK_FLAG_MISALIGNED)
return -1;
if (bdev_is_partition(bdev))
return queue_limit_alignment_offset(&q->limits,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f0f9314ab65c..60116d13cb80 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -22,8 +22,8 @@
struct queue_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct request_queue *, char *);
- ssize_t (*store)(struct request_queue *, const char *, size_t);
+ ssize_t (*show)(struct gendisk *disk, char *page);
+ ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
};
static ssize_t
@@ -47,18 +47,18 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
-static ssize_t queue_requests_show(struct request_queue *q, char *page)
+static ssize_t queue_requests_show(struct gendisk *disk, char *page)
{
- return queue_var_show(q->nr_requests, page);
+ return queue_var_show(disk->queue->nr_requests, page);
}
static ssize_t
-queue_requests_store(struct request_queue *q, const char *page, size_t count)
+queue_requests_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long nr;
int ret, err;
- if (!queue_is_mq(q))
+ if (!queue_is_mq(disk->queue))
return -EINVAL;
ret = queue_var_store(&nr, page, count);
@@ -68,111 +68,91 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
- err = blk_mq_update_nr_requests(q, nr);
+ err = blk_mq_update_nr_requests(disk->queue, nr);
if (err)
return err;
return ret;
}
-static ssize_t queue_ra_show(struct request_queue *q, char *page)
+static ssize_t queue_ra_show(struct gendisk *disk, char *page)
{
- unsigned long ra_kb;
-
- if (!q->disk)
- return -EINVAL;
- ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
- return queue_var_show(ra_kb, page);
+ return queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
}
static ssize_t
-queue_ra_store(struct request_queue *q, const char *page, size_t count)
+queue_ra_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long ra_kb;
ssize_t ret;
- if (!q->disk)
- return -EINVAL;
ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
- q->disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+ disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
return ret;
}
-static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
-{
- int max_sectors_kb = queue_max_sectors(q) >> 1;
-
- return queue_var_show(max_sectors_kb, page);
-}
-
-static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_max_segments(q), page);
-}
-
-static ssize_t queue_max_discard_segments_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_max_discard_segments(q), page);
-}
-
-static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.max_integrity_segments, page);
-}
-
-static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_max_segment_size(q), page);
-}
-
-static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_logical_block_size(q), page);
-}
-
-static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_physical_block_size(q), page);
-}
-
-static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.chunk_sectors, page);
-}
-
-static ssize_t queue_io_min_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_io_min(q), page);
+#define QUEUE_SYSFS_LIMIT_SHOW(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return queue_var_show(disk->queue->limits._field, page); \
+}
+
+QUEUE_SYSFS_LIMIT_SHOW(max_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
+QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
+QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
+QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
+QUEUE_SYSFS_LIMIT_SHOW(io_min)
+QUEUE_SYSFS_LIMIT_SHOW(io_opt)
+QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
+QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
+QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
+QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
+QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
+QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
+QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
+QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
+
+#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%llu\n", \
+ (unsigned long long)disk->queue->limits._field << \
+ SECTOR_SHIFT); \
}
-static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_io_opt(q), page);
-}
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
-static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.discard_granularity, page);
+#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return queue_var_show(disk->queue->limits._field >> 1, page); \
}
-static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
-{
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
- return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_hw_discard_sectors << 9);
+#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%d\n", _val); \
}
-static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_discard_sectors << 9);
-}
+/* deprecated fields */
+QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
+QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
+QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
-static ssize_t queue_discard_max_store(struct request_queue *q,
- const char *page, size_t count)
+static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
+ const char *page, size_t count)
{
unsigned long max_discard_bytes;
struct queue_limits lim;
@@ -183,54 +163,34 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
if (ret < 0)
return ret;
- if (max_discard_bytes & (q->limits.discard_granularity - 1))
+ if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
return -EINVAL;
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
return -EINVAL;
- blk_mq_freeze_queue(q);
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
- err = queue_limits_commit_update(q, &lim);
- blk_mq_unfreeze_queue(q);
-
+ err = queue_limits_commit_update(disk->queue, &lim);
if (err)
return err;
return ret;
}
-static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
-{
- return queue_var_show(0, page);
-}
-
-static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
-{
- return queue_var_show(0, page);
-}
-
-static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
+/*
+ * For zone append queue_max_zone_append_sectors does not just return the
+ * underlying queue limits, but actually contains a calculation. Because of
+ * that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here.
+ */
+static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
{
return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_write_zeroes_sectors << 9);
-}
-
-static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_zone_write_granularity(q), page);
-}
-
-static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
-{
- unsigned long long max_sectors = queue_max_zone_append_sectors(q);
-
- return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
+ (u64)queue_max_zone_append_sectors(disk->queue) <<
+ SECTOR_SHIFT);
}
static ssize_t
-queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
+queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long max_sectors_kb;
struct queue_limits lim;
@@ -241,94 +201,83 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (ret < 0)
return ret;
- blk_mq_freeze_queue(q);
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
lim.max_user_sectors = max_sectors_kb << 1;
- err = queue_limits_commit_update(q, &lim);
- blk_mq_unfreeze_queue(q);
+ err = queue_limits_commit_update(disk->queue, &lim);
if (err)
return err;
return ret;
}
-static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
+static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
+ size_t count, blk_features_t feature)
{
- int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
-
- return queue_var_show(max_hw_sectors_kb, page);
-}
+ struct queue_limits lim;
+ unsigned long val;
+ ssize_t ret;
-static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.virt_boundary_mask, page);
-}
+ ret = queue_var_store(&val, page, count);
+ if (ret < 0)
+ return ret;
-static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_dma_alignment(q), page);
+ lim = queue_limits_start_update(disk->queue);
+ if (val)
+ lim.features |= feature;
+ else
+ lim.features &= ~feature;
+ ret = queue_limits_commit_update(disk->queue, &lim);
+ if (ret)
+ return ret;
+ return count;
}
-#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
-static ssize_t \
-queue_##name##_show(struct request_queue *q, char *page) \
+#define QUEUE_SYSFS_FEATURE(_name, _feature) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
{ \
- int bit; \
- bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \
- return queue_var_show(neg ? !bit : bit, page); \
+ return sprintf(page, "%u\n", \
+ !!(disk->queue->limits.features & _feature)); \
} \
-static ssize_t \
-queue_##name##_store(struct request_queue *q, const char *page, size_t count) \
+static ssize_t queue_##_name##_store(struct gendisk *disk, \
+ const char *page, size_t count) \
{ \
- unsigned long val; \
- ssize_t ret; \
- ret = queue_var_store(&val, page, count); \
- if (ret < 0) \
- return ret; \
- if (neg) \
- val = !val; \
- \
- if (val) \
- blk_queue_flag_set(QUEUE_FLAG_##flag, q); \
- else \
- blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \
- return ret; \
-}
-
-QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
-QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
-QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
-QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
-#undef QUEUE_SYSFS_BIT_FNS
-
-static ssize_t queue_zoned_show(struct request_queue *q, char *page)
-{
- if (blk_queue_is_zoned(q))
- return sprintf(page, "host-managed\n");
- return sprintf(page, "none\n");
+ return queue_feature_store(disk, page, count, _feature); \
}
-static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
-{
- return queue_var_show(disk_nr_zones(q->disk), page);
+QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
+QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
+QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
+QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
+
+#define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%u\n", \
+ !!(disk->queue->limits.features & _feature)); \
}
-static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
+QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL);
+QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
+QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
+
+static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
{
- return queue_var_show(bdev_max_open_zones(q->disk->part0), page);
+ if (blk_queue_is_zoned(disk->queue))
+ return sprintf(page, "host-managed\n");
+ return sprintf(page, "none\n");
}
-static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
+static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
{
- return queue_var_show(bdev_max_active_zones(q->disk->part0), page);
+ return queue_var_show(disk_nr_zones(disk), page);
}
-static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
+static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
{
- return queue_var_show((blk_queue_nomerges(q) << 1) |
- blk_queue_noxmerges(q), page);
+ return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
+ blk_queue_noxmerges(disk->queue), page);
}
-static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
+static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned long nm;
@@ -337,29 +286,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
if (ret < 0)
return ret;
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
- blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, disk->queue);
if (nm == 2)
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue);
else if (nm)
- blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, disk->queue);
return ret;
}
-static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
{
- bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
- bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
+ bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
+ bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
return queue_var_show(set << force, page);
}
static ssize_t
-queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
+queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
{
ssize_t ret = -EINVAL;
#ifdef CONFIG_SMP
+ struct request_queue *q = disk->queue;
unsigned long val;
ret = queue_var_store(&val, page, count);
@@ -380,38 +330,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%d\n", -1);
-}
-
-static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
+static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
size_t count)
{
return count;
}
-static ssize_t queue_poll_show(struct request_queue *q, char *page)
-{
- return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
-}
-
-static ssize_t queue_poll_store(struct request_queue *q, const char *page,
+static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
size_t count)
{
- if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ if (!(disk->queue->limits.features & BLK_FEAT_POLL))
return -EINVAL;
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n");
return count;
}
-static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
+static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
{
- return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
+ return sprintf(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
}
-static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
+static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned int val;
@@ -421,46 +361,45 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
if (err || val == 0)
return -EINVAL;
- blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+ blk_queue_rq_timeout(disk->queue, msecs_to_jiffies(val));
return count;
}
-static ssize_t queue_wc_show(struct request_queue *q, char *page)
+static ssize_t queue_wc_show(struct gendisk *disk, char *page)
{
- if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (blk_queue_write_cache(disk->queue))
return sprintf(page, "write back\n");
-
return sprintf(page, "write through\n");
}
-static ssize_t queue_wc_store(struct request_queue *q, const char *page,
+static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
size_t count)
{
+ struct queue_limits lim;
+ bool disable;
+ int err;
+
if (!strncmp(page, "write back", 10)) {
- if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags))
- return -EINVAL;
- blk_queue_flag_set(QUEUE_FLAG_WC, q);
+ disable = false;
} else if (!strncmp(page, "write through", 13) ||
- !strncmp(page, "none", 4)) {
- blk_queue_flag_clear(QUEUE_FLAG_WC, q);
+ !strncmp(page, "none", 4)) {
+ disable = true;
} else {
return -EINVAL;
}
+ lim = queue_limits_start_update(disk->queue);
+ if (disable)
+ lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
+ else
+ lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
+ err = queue_limits_commit_update(disk->queue, &lim);
+ if (err)
+ return err;
return count;
}
-static ssize_t queue_fua_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
-}
-
-static ssize_t queue_dax_show(struct request_queue *q, char *page)
-{
- return queue_var_show(blk_queue_dax(q), page);
-}
-
#define QUEUE_RO_ENTRY(_prefix, _name) \
static struct queue_sysfs_entry _prefix##_entry = { \
.attr = { .name = _name, .mode = 0444 }, \
@@ -491,12 +430,18 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
-QUEUE_RO_ENTRY(queue_discard_max_hw, "discard_max_hw_bytes");
-QUEUE_RW_ENTRY(queue_discard_max, "discard_max_bytes");
+QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
+QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
+QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
+QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
+ "atomic_write_boundary_bytes");
+QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
+QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
+
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
-QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
+QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
@@ -522,9 +467,9 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
.show = queue_logical_block_size_show,
};
-QUEUE_RW_ENTRY(queue_nonrot, "rotational");
+QUEUE_RW_ENTRY(queue_rotational, "rotational");
QUEUE_RW_ENTRY(queue_iostats, "iostats");
-QUEUE_RW_ENTRY(queue_random, "add_random");
+QUEUE_RW_ENTRY(queue_add_random, "add_random");
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
#ifdef CONFIG_BLK_WBT
@@ -541,20 +486,22 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
return 0;
}
-static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
{
- if (!wbt_rq_qos(q))
+ if (!wbt_rq_qos(disk->queue))
return -EINVAL;
- if (wbt_disabled(q))
+ if (wbt_disabled(disk->queue))
return sprintf(page, "0\n");
- return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+ return sprintf(page, "%llu\n",
+ div_u64(wbt_get_min_lat(disk->queue), 1000));
}
-static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
size_t count)
{
+ struct request_queue *q = disk->queue;
struct rq_qos *rqos;
ssize_t ret;
s64 val;
@@ -567,7 +514,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
rqos = wbt_rq_qos(q);
if (!rqos) {
- ret = wbt_init(q->disk);
+ ret = wbt_init(disk);
if (ret)
return ret;
}
@@ -585,13 +532,11 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
* ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens.
*/
- blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
wbt_set_min_lat(q, val);
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
return count;
}
@@ -615,14 +560,18 @@ static struct attribute *queue_attrs[] = {
&queue_io_min_entry.attr,
&queue_io_opt_entry.attr,
&queue_discard_granularity_entry.attr,
- &queue_discard_max_entry.attr,
- &queue_discard_max_hw_entry.attr,
+ &queue_max_discard_sectors_entry.attr,
+ &queue_max_hw_discard_sectors_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_atomic_write_max_sectors_entry.attr,
+ &queue_atomic_write_boundary_sectors_entry.attr,
+ &queue_atomic_write_unit_min_entry.attr,
+ &queue_atomic_write_unit_max_entry.attr,
&queue_write_same_max_entry.attr,
- &queue_write_zeroes_max_entry.attr,
+ &queue_max_write_zeroes_sectors_entry.attr,
&queue_zone_append_max_entry.attr,
&queue_zone_write_granularity_entry.attr,
- &queue_nonrot_entry.attr,
+ &queue_rotational_entry.attr,
&queue_zoned_entry.attr,
&queue_nr_zones_entry.attr,
&queue_max_open_zones_entry.attr,
@@ -630,7 +579,7 @@ static struct attribute *queue_attrs[] = {
&queue_nomerges_entry.attr,
&queue_iostats_entry.attr,
&queue_stable_writes_entry.attr,
- &queue_random_entry.attr,
+ &queue_add_random_entry.attr,
&queue_poll_entry.attr,
&queue_wc_entry.attr,
&queue_fua_entry.attr,
@@ -699,14 +648,13 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
- struct request_queue *q = disk->queue;
ssize_t res;
if (!entry->show)
return -EIO;
- mutex_lock(&q->sysfs_lock);
- res = entry->show(q, page);
- mutex_unlock(&q->sysfs_lock);
+ mutex_lock(&disk->queue->sysfs_lock);
+ res = entry->show(disk, page);
+ mutex_unlock(&disk->queue->sysfs_lock);
return res;
}
@@ -722,9 +670,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
+ blk_mq_freeze_queue(q);
mutex_lock(&q->sysfs_lock);
- res = entry->store(q, page, length);
+ res = entry->store(disk, page, length);
mutex_unlock(&q->sysfs_lock);
+ blk_mq_unfreeze_queue(q);
return res;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c1bf73f8c75d..dc6140fa3de0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -704,6 +704,9 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
/* Calc approx time to dispatch */
jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
+
+ /* make sure at least one io can be dispatched after waiting */
+ jiffy_wait = max(jiffy_wait, HZ / iops_limit + 1);
return jiffy_wait;
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 64472134dd26..6dfc659d22e2 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -37,7 +37,7 @@
enum wbt_flags {
WBT_TRACKED = 1, /* write, tracked for throttling */
WBT_READ = 2, /* read */
- WBT_KSWAPD = 4, /* write, from kswapd */
+ WBT_SWAP = 4, /* write, from swap_writepage() */
WBT_DISCARD = 8, /* discard */
WBT_NR_BITS = 4, /* number of bits */
@@ -45,7 +45,7 @@ enum wbt_flags {
enum {
WBT_RWQ_BG = 0,
- WBT_RWQ_KSWAPD,
+ WBT_RWQ_SWAP,
WBT_RWQ_DISCARD,
WBT_NUM_RWQ,
};
@@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
enum wbt_flags wb_acct)
{
- if (wb_acct & WBT_KSWAPD)
- return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+ if (wb_acct & WBT_SWAP)
+ return &rwb->rq_wait[WBT_RWQ_SWAP];
else if (wb_acct & WBT_DISCARD)
return &rwb->rq_wait[WBT_RWQ_DISCARD];
@@ -206,8 +206,8 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
*/
if (wb_acct & WBT_DISCARD)
limit = rwb->wb_background;
- else if (test_bit(QUEUE_FLAG_WC, &rwb->rqos.disk->queue->queue_flags) &&
- !wb_recent_wait(rwb))
+ else if (blk_queue_write_cache(rwb->rqos.disk->queue) &&
+ !wb_recent_wait(rwb))
limit = 0;
else
limit = rwb->wb_normal;
@@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
time_before(now, rwb->last_comp + HZ / 10);
}
-#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO)
+#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{
@@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
/*
* At this point we know it's a buffered write. If this is
- * kswapd trying to free memory, or REQ_SYNC is set, then
+ * swap trying to free memory, or REQ_SYNC is set, then
* it's WB_SYNC_ALL writeback, and we'll use the max limit for
* that. If the write is marked as a background write, then use
* the idle limit, or go to normal if we haven't had competing
* IO for a bit.
*/
- if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+ if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
limit = rwb->rq_depth.max_depth;
else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
/*
@@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
if (bio_op(bio) == REQ_OP_READ) {
flags = WBT_READ;
} else if (wbt_should_throttle(bio)) {
- if (current_is_kswapd())
- flags |= WBT_KSWAPD;
+ if (bio->bi_opf & REQ_SWAP)
+ flags |= WBT_SWAP;
if (bio_op(bio) == REQ_OP_DISCARD)
flags |= WBT_DISCARD;
flags |= WBT_TRACKED;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 08d7dfe8bd93..af19296fa50d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -116,24 +116,6 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
/**
- * bdev_nr_zones - Get number of zones
- * @bdev: Target device
- *
- * Return the total number of zones of a zoned block device. For a block
- * device without zone capabilities, the number of zones is always 0.
- */
-unsigned int bdev_nr_zones(struct block_device *bdev)
-{
- sector_t zone_sectors = bdev_zone_sectors(bdev);
-
- if (!bdev_is_zoned(bdev))
- return 0;
- return (bdev_nr_sectors(bdev) + zone_sectors - 1) >>
- ilog2(zone_sectors);
-}
-EXPORT_SYMBOL_GPL(bdev_nr_zones);
-
-/**
* blkdev_report_zones - Get zones information
* @bdev: Target block device
* @sector: Sector from which to report zones
@@ -168,77 +150,6 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
- unsigned int nr_zones)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_NOIO, node);
-}
-
-static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- /*
- * For an all-zones reset, ignore conventional, empty, read-only
- * and offline zones.
- */
- switch (zone->cond) {
- case BLK_ZONE_COND_NOT_WP:
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_READONLY:
- case BLK_ZONE_COND_OFFLINE:
- return 0;
- default:
- set_bit(idx, (unsigned long *)data);
- return 0;
- }
-}
-
-static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
-{
- struct gendisk *disk = bdev->bd_disk;
- sector_t capacity = bdev_nr_sectors(bdev);
- sector_t zone_sectors = bdev_zone_sectors(bdev);
- unsigned long *need_reset;
- struct bio *bio = NULL;
- sector_t sector = 0;
- int ret;
-
- need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
- if (!need_reset)
- return -ENOMEM;
-
- ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
- blk_zone_need_reset_cb, need_reset);
- if (ret < 0)
- goto out_free_need_reset;
-
- ret = 0;
- while (sector < capacity) {
- if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
- sector += zone_sectors;
- continue;
- }
-
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
- GFP_KERNEL);
- bio->bi_iter.bi_sector = sector;
- sector += zone_sectors;
-
- /* This may take a while, so be nice to others */
- cond_resched();
- }
-
- if (bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- }
-
-out_free_need_reset:
- kfree(need_reset);
- return ret;
-}
-
static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;
@@ -265,7 +176,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev)
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sector, sector_t nr_sectors)
{
- struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = bdev_zone_sectors(bdev);
sector_t capacity = bdev_nr_sectors(bdev);
sector_t end_sector = sector + nr_sectors;
@@ -293,16 +203,11 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
return -EINVAL;
/*
- * In the case of a zone reset operation over all zones,
- * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
- * command. For other devices, we emulate this command behavior by
- * identifying the zones needing a reset.
+ * In the case of a zone reset operation over all zones, use
+ * REQ_OP_ZONE_RESET_ALL.
*/
- if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
- if (!blk_queue_zone_resetall(q))
- return blkdev_zone_reset_all_emulated(bdev);
+ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
return blkdev_zone_reset_all(bdev);
- }
while (sector < end_sector) {
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
@@ -1573,7 +1478,7 @@ void disk_free_zone_resources(struct gendisk *disk)
mempool_destroy(disk->zone_wplugs_pool);
disk->zone_wplugs_pool = NULL;
- kfree(disk->conv_zones_bitmap);
+ bitmap_free(disk->conv_zones_bitmap);
disk->conv_zones_bitmap = NULL;
disk->zone_capacity = 0;
disk->last_zone_capacity = 0;
@@ -1650,8 +1555,22 @@ static int disk_update_zone_resources(struct gendisk *disk,
return -ENODEV;
}
+ lim = queue_limits_start_update(q);
+
+ /*
+ * Some devices can advertize zone resource limits that are larger than
+ * the number of sequential zones of the zoned block device, e.g. a
+ * small ZNS namespace. For such case, assume that the zoned device has
+ * no zone resource limits.
+ */
+ nr_seq_zones = disk->nr_zones - nr_conv_zones;
+ if (lim.max_open_zones >= nr_seq_zones)
+ lim.max_open_zones = 0;
+ if (lim.max_active_zones >= nr_seq_zones)
+ lim.max_active_zones = 0;
+
if (!disk->zone_wplugs_pool)
- return 0;
+ goto commit;
/*
* If the device has no limit on the maximum number of open and active
@@ -1660,9 +1579,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
* dynamic zone write plug allocation when simultaneously writing to
* more zones than the size of the mempool.
*/
- lim = queue_limits_start_update(q);
-
- nr_seq_zones = disk->nr_zones - nr_conv_zones;
pool_size = max(lim.max_open_zones, lim.max_active_zones);
if (!pool_size)
pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones);
@@ -1676,6 +1592,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
lim.max_open_zones = 0;
}
+commit:
return queue_limits_commit_update(q, &lim);
}
@@ -1683,7 +1600,6 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
struct blk_revalidate_zone_args *args)
{
struct gendisk *disk = args->disk;
- struct request_queue *q = disk->queue;
if (zone->capacity != zone->len) {
pr_warn("%s: Invalid conventional zone capacity\n",
@@ -1699,7 +1615,7 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
if (!args->conv_zones_bitmap) {
args->conv_zones_bitmap =
- blk_alloc_zone_bitmap(q->node, args->nr_zones);
+ bitmap_zalloc(args->nr_zones, GFP_NOIO);
if (!args->conv_zones_bitmap)
return -ENOMEM;
}
diff --git a/block/blk.h b/block/blk.h
index 189bc25beb50..8e8936e97307 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -98,8 +98,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
{
unsigned long mask = queue_segment_boundary(q);
- phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
- phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
+ phys_addr_t addr1 = bvec_phys(vec1);
+ phys_addr_t addr2 = bvec_phys(vec2);
/*
* Merging adjacent physical pages may not work correctly under KMSAN
@@ -181,9 +181,11 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
return queue_max_segments(rq->q);
}
-static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
- enum req_op op)
+static inline unsigned int blk_queue_get_max_sectors(struct request *rq)
{
+ struct request_queue *q = rq->q;
+ enum req_op op = req_op(rq);
+
if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
return min(q->limits.max_discard_sectors,
UINT_MAX >> SECTOR_SHIFT);
@@ -191,6 +193,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(op == REQ_OP_WRITE_ZEROES))
return q->limits.max_write_zeroes_sectors;
+ if (rq->cmd_flags & REQ_ATOMIC)
+ return q->limits.atomic_write_max_sectors;
+
return q->limits.max_sectors;
}
@@ -352,6 +357,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
int blk_set_default_limits(struct queue_limits *lim);
+void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+ struct queue_limits *lim);
int blk_dev_init(void);
/*
@@ -393,7 +400,7 @@ struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
static inline bool blk_queue_may_bounce(struct request_queue *q)
{
return IS_ENABLED(CONFIG_BOUNCE) &&
- q->limits.bounce == BLK_BOUNCE_HIGH &&
+ (q->limits.features & BLK_FEAT_BOUNCE_HIGH) &&
max_low_pfn >= max_pfn;
}
@@ -673,4 +680,9 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops, struct file *bdev_file);
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
+void blk_integrity_generate(struct bio *bio);
+void blk_integrity_verify(struct bio *bio);
+void blk_integrity_prepare(struct request *rq);
+void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/elevator.c b/block/elevator.c
index f64ebd726e58..f13d552a32c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -709,24 +709,25 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return ret;
}
-ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
+ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
int ret;
- if (!elv_support_iosched(q))
+ if (!elv_support_iosched(disk->queue))
return count;
strscpy(elevator_name, buf, sizeof(elevator_name));
- ret = elevator_change(q, strstrip(elevator_name));
+ ret = elevator_change(disk->queue, strstrip(elevator_name));
if (!ret)
return count;
return ret;
}
-ssize_t elv_iosched_show(struct request_queue *q, char *name)
+ssize_t elv_iosched_show(struct gendisk *disk, char *name)
{
+ struct request_queue *q = disk->queue;
struct elevator_queue *eq = q->elevator;
struct elevator_type *cur = NULL, *e;
int len = 0;
diff --git a/block/elevator.h b/block/elevator.h
index e9a050a96e53..3fe18e1a8692 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -147,8 +147,8 @@ extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
-extern ssize_t elv_iosched_show(struct request_queue *, char *);
-extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
+ssize_t elv_iosched_show(struct gendisk *disk, char *page);
+ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
diff --git a/block/fops.c b/block/fops.c
index 376265935714..9825c1713a49 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -34,9 +34,12 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
return opf;
}
-static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
- struct iov_iter *iter)
+static bool blkdev_dio_invalid(struct block_device *bdev, loff_t pos,
+ struct iov_iter *iter, bool is_atomic)
{
+ if (is_atomic && !generic_atomic_write_valid(iter, pos))
+ return true;
+
return pos & (bdev_logical_block_size(bdev) - 1) ||
!bdev_iter_is_aligned(bdev, iter);
}
@@ -72,6 +75,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio.bi_ioprio = iocb->ki_ioprio;
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ bio.bi_opf |= REQ_ATOMIC;
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
@@ -343,6 +348,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
task_io_account_write(bio->bi_iter.bi_size);
}
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ bio->bi_opf |= REQ_ATOMIC;
+
if (iocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
@@ -359,12 +367,13 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
+ bool is_atomic = iocb->ki_flags & IOCB_ATOMIC;
unsigned int nr_pages;
if (!iov_iter_count(iter))
return 0;
- if (blkdev_dio_unaligned(bdev, iocb->ki_pos, iter))
+ if (blkdev_dio_invalid(bdev, iocb->ki_pos, iter, is_atomic))
return -EINVAL;
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
@@ -373,6 +382,8 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return __blkdev_direct_IO_simple(iocb, iter, bdev,
nr_pages);
return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages);
+ } else if (is_atomic) {
+ return -EINVAL;
}
return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages));
}
@@ -383,10 +394,11 @@ static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct block_device *bdev = I_BDEV(inode);
loff_t isize = i_size_read(inode);
- iomap->bdev = bdev;
- iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
if (offset >= isize)
return -EIO;
+
+ iomap->bdev = bdev;
+ iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
iomap->type = IOMAP_MAPPED;
iomap->addr = iomap->offset;
iomap->length = isize - iomap->offset;
@@ -612,6 +624,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (!bdev)
return -ENXIO;
+ if (bdev_can_atomic_write(bdev) && filp->f_flags & O_DIRECT)
+ filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
+
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
if (ret)
blkdev_put_no_open(bdev);
diff --git a/block/genhd.c b/block/genhd.c
index 8f1f3c6b4d67..4dc95a463505 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -524,7 +524,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
}
- disk_update_readahead(disk);
+ blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
disk_add_events(disk);
set_bit(GD_ADDED, &disk->state);
return 0;
diff --git a/block/ioctl.c b/block/ioctl.c
index d570e1695896..e8e4a4190f18 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -224,7 +224,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
goto fail;
err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
- BLKDEV_ZERO_NOUNMAP);
+ BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE);
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 94eede4fb9eb..acdc28756d9d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -488,6 +488,20 @@ unlock:
}
/*
+ * 'depth' is a number in the range 1..INT_MAX representing a number of
+ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
+ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
+ * Values larger than q->nr_requests have the same effect as q->nr_requests.
+ */
+static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
+{
+ struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
+ const unsigned int nrr = hctx->queue->nr_requests;
+
+ return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
+}
+
+/*
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
* function is used by __blk_mq_get_tag().
*/
@@ -503,7 +517,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
* Throttle asynchronous requests and writes such that these requests
* do not block the allocation of synchronous requests.
*/
- data->shallow_depth = dd->async_depth;
+ data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
}
/* Called by blk_mq_update_nr_requests(). */
@@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
struct deadline_data *dd = q->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
- dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+ dd->async_depth = q->nr_requests;
- sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
}
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
diff --git a/block/t10-pi.c b/block/t10-pi.c
index f4cc91156da1..425e2836f3e1 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -11,17 +11,23 @@
#include <linux/module.h>
#include <net/checksum.h>
#include <asm/unaligned.h>
+#include "blk.h"
+
+struct blk_integrity_iter {
+ void *prot_buf;
+ void *data_buf;
+ sector_t seed;
+ unsigned int data_size;
+ unsigned short interval;
+ const char *disk_name;
+};
-typedef __be16 (csum_fn) (__be16, void *, unsigned int);
-
-static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len)
-{
- return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len));
-}
-
-static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
+static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
+ unsigned char csum_type)
{
- return (__force __be16)ip_compute_csum(data, len);
+ if (csum_type == BLK_INTEGRITY_CSUM_IP)
+ return (__force __be16)ip_compute_csum(data, len);
+ return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len));
}
/*
@@ -29,48 +35,44 @@ static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
* tag.
*/
-static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
- csum_fn *fn, enum t10_dif_type type)
+static void t10_pi_generate(struct blk_integrity_iter *iter,
+ struct blk_integrity *bi)
{
- u8 offset = iter->pi_offset;
+ u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct t10_pi_tuple *pi = iter->prot_buf + offset;
- pi->guard_tag = fn(0, iter->data_buf, iter->interval);
+ pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval,
+ bi->csum_type);
if (offset)
- pi->guard_tag = fn(pi->guard_tag, iter->prot_buf,
- offset);
+ pi->guard_tag = t10_pi_csum(pi->guard_tag,
+ iter->prot_buf, offset, bi->csum_type);
pi->app_tag = 0;
- if (type == T10_PI_TYPE1_PROTECTION)
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
else
pi->ref_tag = 0;
iter->data_buf += iter->interval;
- iter->prot_buf += iter->tuple_size;
+ iter->prot_buf += bi->tuple_size;
iter->seed++;
}
-
- return BLK_STS_OK;
}
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
- csum_fn *fn, enum t10_dif_type type)
+ struct blk_integrity *bi)
{
- u8 offset = iter->pi_offset;
+ u8 offset = bi->pi_offset;
unsigned int i;
- BUG_ON(type == T10_PI_TYPE0_PROTECTION);
-
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct t10_pi_tuple *pi = iter->prot_buf + offset;
__be16 csum;
- if (type == T10_PI_TYPE1_PROTECTION ||
- type == T10_PI_TYPE2_PROTECTION) {
+ if (bi->flags & BLK_INTEGRITY_REF_TAG) {
if (pi->app_tag == T10_PI_APP_ESCAPE)
goto next;
@@ -82,15 +84,17 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
iter->seed, be32_to_cpu(pi->ref_tag));
return BLK_STS_PROTECTION;
}
- } else if (type == T10_PI_TYPE3_PROTECTION) {
+ } else {
if (pi->app_tag == T10_PI_APP_ESCAPE &&
pi->ref_tag == T10_PI_REF_ESCAPE)
goto next;
}
- csum = fn(0, iter->data_buf, iter->interval);
+ csum = t10_pi_csum(0, iter->data_buf, iter->interval,
+ bi->csum_type);
if (offset)
- csum = fn(csum, iter->prot_buf, offset);
+ csum = t10_pi_csum(csum, iter->prot_buf, offset,
+ bi->csum_type);
if (pi->guard_tag != csum) {
pr_err("%s: guard tag error at sector %llu " \
@@ -102,33 +106,13 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
next:
iter->data_buf += iter->interval;
- iter->prot_buf += iter->tuple_size;
+ iter->prot_buf += bi->tuple_size;
iter->seed++;
}
return BLK_STS_OK;
}
-static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
-}
-
-static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
-}
-
-static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
-}
-
-static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
-}
-
/**
* t10_pi_type1_prepare - prepare PI prior submitting request to device
* @rq: request with PI that should be prepared
@@ -141,7 +125,7 @@ static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
*/
static void t10_pi_type1_prepare(struct request *rq)
{
- struct blk_integrity *bi = &rq->q->integrity;
+ struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
@@ -192,7 +176,7 @@ static void t10_pi_type1_prepare(struct request *rq)
*/
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
- struct blk_integrity *bi = &rq->q->integrity;
+ struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
@@ -225,81 +209,15 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
}
}
-static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-/* Type 3 does not have a reference tag so no remapping is required. */
-static void t10_pi_type3_prepare(struct request *rq)
-{
-}
-
-/* Type 3 does not have a reference tag so no remapping is required. */
-static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
-{
-}
-
-const struct blk_integrity_profile t10_pi_type1_crc = {
- .name = "T10-DIF-TYPE1-CRC",
- .generate_fn = t10_pi_type1_generate_crc,
- .verify_fn = t10_pi_type1_verify_crc,
- .prepare_fn = t10_pi_type1_prepare,
- .complete_fn = t10_pi_type1_complete,
-};
-EXPORT_SYMBOL(t10_pi_type1_crc);
-
-const struct blk_integrity_profile t10_pi_type1_ip = {
- .name = "T10-DIF-TYPE1-IP",
- .generate_fn = t10_pi_type1_generate_ip,
- .verify_fn = t10_pi_type1_verify_ip,
- .prepare_fn = t10_pi_type1_prepare,
- .complete_fn = t10_pi_type1_complete,
-};
-EXPORT_SYMBOL(t10_pi_type1_ip);
-
-const struct blk_integrity_profile t10_pi_type3_crc = {
- .name = "T10-DIF-TYPE3-CRC",
- .generate_fn = t10_pi_type3_generate_crc,
- .verify_fn = t10_pi_type3_verify_crc,
- .prepare_fn = t10_pi_type3_prepare,
- .complete_fn = t10_pi_type3_complete,
-};
-EXPORT_SYMBOL(t10_pi_type3_crc);
-
-const struct blk_integrity_profile t10_pi_type3_ip = {
- .name = "T10-DIF-TYPE3-IP",
- .generate_fn = t10_pi_type3_generate_ip,
- .verify_fn = t10_pi_type3_verify_ip,
- .prepare_fn = t10_pi_type3_prepare,
- .complete_fn = t10_pi_type3_complete,
-};
-EXPORT_SYMBOL(t10_pi_type3_ip);
-
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
{
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
}
-static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
- enum t10_dif_type type)
+static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
+ struct blk_integrity *bi)
{
- u8 offset = iter->pi_offset;
+ u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
@@ -311,17 +229,15 @@ static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
iter->prot_buf, offset);
pi->app_tag = 0;
- if (type == T10_PI_TYPE1_PROTECTION)
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
put_unaligned_be48(iter->seed, pi->ref_tag);
else
put_unaligned_be48(0ULL, pi->ref_tag);
iter->data_buf += iter->interval;
- iter->prot_buf += iter->tuple_size;
+ iter->prot_buf += bi->tuple_size;
iter->seed++;
}
-
- return BLK_STS_OK;
}
static bool ext_pi_ref_escape(u8 *ref_tag)
@@ -332,9 +248,9 @@ static bool ext_pi_ref_escape(u8 *ref_tag)
}
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
- enum t10_dif_type type)
+ struct blk_integrity *bi)
{
- u8 offset = iter->pi_offset;
+ u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0; i < iter->data_size; i += iter->interval) {
@@ -342,7 +258,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
u64 ref, seed;
__be64 csum;
- if (type == T10_PI_TYPE1_PROTECTION) {
+ if (bi->flags & BLK_INTEGRITY_REF_TAG) {
if (pi->app_tag == T10_PI_APP_ESCAPE)
goto next;
@@ -353,7 +269,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
iter->disk_name, seed, ref);
return BLK_STS_PROTECTION;
}
- } else if (type == T10_PI_TYPE3_PROTECTION) {
+ } else {
if (pi->app_tag == T10_PI_APP_ESCAPE &&
ext_pi_ref_escape(pi->ref_tag))
goto next;
@@ -374,26 +290,16 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
next:
iter->data_buf += iter->interval;
- iter->prot_buf += iter->tuple_size;
+ iter->prot_buf += bi->tuple_size;
iter->seed++;
}
return BLK_STS_OK;
}
-static blk_status_t ext_pi_type1_verify_crc64(struct blk_integrity_iter *iter)
-{
- return ext_pi_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
-}
-
-static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter)
-{
- return ext_pi_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
-}
-
static void ext_pi_type1_prepare(struct request *rq)
{
- struct blk_integrity *bi = &rq->q->integrity;
+ struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
@@ -433,7 +339,7 @@ static void ext_pi_type1_prepare(struct request *rq)
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
- struct blk_integrity *bi = &rq->q->integrity;
+ struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
@@ -467,33 +373,105 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
}
}
-static blk_status_t ext_pi_type3_verify_crc64(struct blk_integrity_iter *iter)
+void blk_integrity_generate(struct bio *bio)
{
- return ext_pi_crc64_verify(iter, T10_PI_TYPE3_PROTECTION);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity_iter iter;
+ struct bvec_iter bviter;
+ struct bio_vec bv;
+
+ iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ iter.interval = 1 << bi->interval_exp;
+ iter.seed = bio->bi_iter.bi_sector;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
+ bio_for_each_segment(bv, bio, bviter) {
+ void *kaddr = bvec_kmap_local(&bv);
+
+ iter.data_buf = kaddr;
+ iter.data_size = bv.bv_len;
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_CRC64:
+ ext_pi_crc64_generate(&iter, bi);
+ break;
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ t10_pi_generate(&iter, bi);
+ break;
+ default:
+ break;
+ }
+ kunmap_local(kaddr);
+ }
}
-static blk_status_t ext_pi_type3_generate_crc64(struct blk_integrity_iter *iter)
+void blk_integrity_verify(struct bio *bio)
{
- return ext_pi_crc64_generate(iter, T10_PI_TYPE3_PROTECTION);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity_iter iter;
+ struct bvec_iter bviter;
+ struct bio_vec bv;
+
+ /*
+ * At the moment verify is called bi_iter has been advanced during split
+ * and completion, so use the copy created during submission here.
+ */
+ iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ iter.interval = 1 << bi->interval_exp;
+ iter.seed = bip->bio_iter.bi_sector;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
+ __bio_for_each_segment(bv, bio, bviter, bip->bio_iter) {
+ void *kaddr = bvec_kmap_local(&bv);
+ blk_status_t ret = BLK_STS_OK;
+
+ iter.data_buf = kaddr;
+ iter.data_size = bv.bv_len;
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_CRC64:
+ ret = ext_pi_crc64_verify(&iter, bi);
+ break;
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ ret = t10_pi_verify(&iter, bi);
+ break;
+ default:
+ break;
+ }
+ kunmap_local(kaddr);
+
+ if (ret) {
+ bio->bi_status = ret;
+ return;
+ }
+ }
}
-const struct blk_integrity_profile ext_pi_type1_crc64 = {
- .name = "EXT-DIF-TYPE1-CRC64",
- .generate_fn = ext_pi_type1_generate_crc64,
- .verify_fn = ext_pi_type1_verify_crc64,
- .prepare_fn = ext_pi_type1_prepare,
- .complete_fn = ext_pi_type1_complete,
-};
-EXPORT_SYMBOL_GPL(ext_pi_type1_crc64);
-
-const struct blk_integrity_profile ext_pi_type3_crc64 = {
- .name = "EXT-DIF-TYPE3-CRC64",
- .generate_fn = ext_pi_type3_generate_crc64,
- .verify_fn = ext_pi_type3_verify_crc64,
- .prepare_fn = t10_pi_type3_prepare,
- .complete_fn = t10_pi_type3_complete,
-};
-EXPORT_SYMBOL_GPL(ext_pi_type3_crc64);
+void blk_integrity_prepare(struct request *rq)
+{
+ struct blk_integrity *bi = &rq->q->limits.integrity;
+
+ if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
+ return;
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
+ ext_pi_type1_prepare(rq);
+ else
+ t10_pi_type1_prepare(rq);
+}
+
+void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+ struct blk_integrity *bi = &rq->q->limits.integrity;
+
+ if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
+ return;
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
+ ext_pi_type1_complete(rq, nr_bytes);
+ else
+ t10_pi_type1_complete(rq, nr_bytes);
+}
MODULE_DESCRIPTION("T10 Protection Information module");
MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index 8907b8bf4267..c49b9f8de723 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -44,7 +44,6 @@ acpi_ex_system_memory_space_handler(u32 function,
struct acpi_mem_mapping *mm = mem_info->cur_mm;
u32 length;
acpi_size map_length;
- acpi_size page_boundary_map_length;
#ifdef ACPI_MISALIGNMENT_NOT_SUPPORTED
u32 remainder;
#endif
@@ -138,26 +137,8 @@ acpi_ex_system_memory_space_handler(u32 function,
map_length = (acpi_size)
((mem_info->address + mem_info->length) - address);
- /*
- * If mapping the entire remaining portion of the region will cross
- * a page boundary, just map up to the page boundary, do not cross.
- * On some systems, crossing a page boundary while mapping regions
- * can cause warnings if the pages have different attributes
- * due to resource management.
- *
- * This has the added benefit of constraining a single mapping to
- * one page, which is similar to the original code that used a 4k
- * maximum window.
- */
- page_boundary_map_length = (acpi_size)
- (ACPI_ROUND_UP(address, ACPI_DEFAULT_PAGE_SIZE) - address);
- if (page_boundary_map_length == 0) {
- page_boundary_map_length = ACPI_DEFAULT_PAGE_SIZE;
- }
-
- if (map_length > page_boundary_map_length) {
- map_length = page_boundary_map_length;
- }
+ if (map_length > ACPI_DEFAULT_PAGE_SIZE)
+ map_length = ACPI_DEFAULT_PAGE_SIZE;
/* Create a new mapping starting at the address given */
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 2a0e9fc7b74c..601b670356e5 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -302,6 +302,10 @@ void acpi_mipi_check_crs_csi2(acpi_handle handle);
void acpi_mipi_scan_crs_csi2(void);
void acpi_mipi_init_crs_csi2_swnodes(void);
void acpi_mipi_crs_csi2_cleanup(void);
+#ifdef CONFIG_X86
bool acpi_graph_ignore_port(acpi_handle handle);
+#else
+static inline bool acpi_graph_ignore_port(acpi_handle handle) { return false; }
+#endif
#endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/mipi-disco-img.c b/drivers/acpi/mipi-disco-img.c
index d05413a0672a..92b658f92dc0 100644
--- a/drivers/acpi/mipi-disco-img.c
+++ b/drivers/acpi/mipi-disco-img.c
@@ -725,14 +725,20 @@ void acpi_mipi_crs_csi2_cleanup(void)
acpi_mipi_del_crs_csi2(csi2);
}
-static const struct dmi_system_id dmi_ignore_port_nodes[] = {
- {
- .matches = {
- DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS 9315"),
- },
- },
- { }
+#ifdef CONFIG_X86
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+
+/* CPU matches for Dell generations with broken ACPI MIPI DISCO info */
+static const struct x86_cpu_id dell_broken_mipi_disco_cpu_gens[] = {
+ X86_MATCH_VFM(INTEL_TIGERLAKE, NULL),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, NULL),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, NULL),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, NULL),
+ {}
};
static const char *strnext(const char *s1, const char *s2)
@@ -761,7 +767,10 @@ bool acpi_graph_ignore_port(acpi_handle handle)
static bool dmi_tested, ignore_port;
if (!dmi_tested) {
- ignore_port = dmi_first_match(dmi_ignore_port_nodes);
+ if (dmi_name_in_vendors("Dell Inc.") &&
+ x86_match_cpu(dell_broken_mipi_disco_cpu_gens))
+ ignore_port = true;
+
dmi_tested = true;
}
@@ -794,3 +803,4 @@ out_free:
kfree(orig_path);
return false;
}
+#endif
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index bd6a7857ce05..831fa4a12159 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -16,7 +16,6 @@
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/sched.h> /* need_resched() */
-#include <linux/sort.h>
#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
@@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
}
-static int acpi_cst_latency_cmp(const void *a, const void *b)
+static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length)
{
- const struct acpi_processor_cx *x = a, *y = b;
+ int i, j, k;
- if (!(x->valid && y->valid))
- return 0;
- if (x->latency > y->latency)
- return 1;
- if (x->latency < y->latency)
- return -1;
- return 0;
-}
-static void acpi_cst_latency_swap(void *a, void *b, int n)
-{
- struct acpi_processor_cx *x = a, *y = b;
+ for (i = 1; i < length; i++) {
+ if (!states[i].valid)
+ continue;
- if (!(x->valid && y->valid))
- return;
- swap(x->latency, y->latency);
+ for (j = i - 1, k = i; j >= 0; j--) {
+ if (!states[j].valid)
+ continue;
+
+ if (states[j].latency > states[k].latency)
+ swap(states[j].latency, states[k].latency);
+
+ k = j;
+ }
+ }
}
static int acpi_processor_power_verify(struct acpi_processor *pr)
@@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
if (buggy_latency) {
pr_notice("FW issue: working around C-state latencies out of order\n");
- sort(&pr->power.states[1], max_cstate,
- sizeof(struct acpi_processor_cx),
- acpi_cst_latency_cmp,
- acpi_cst_latency_swap);
+ acpi_cst_latency_sort(&pr->power.states[1], max_cstate);
}
lapic_timer_propagate_broadcast(pr);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 07d66d2c5f0d..fc6fd583faf8 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1735,6 +1735,14 @@ static void ahci_update_initial_lpm_policy(struct ata_port *ap)
if (ap->pflags & ATA_PFLAG_EXTERNAL)
return;
+ /* If no LPM states are supported by the HBA, do not bother with LPM */
+ if ((ap->host->flags & ATA_HOST_NO_PART) &&
+ (ap->host->flags & ATA_HOST_NO_SSC) &&
+ (ap->host->flags & ATA_HOST_NO_DEVSLP)) {
+ ata_port_dbg(ap, "no LPM states supported, not enabling LPM\n");
+ return;
+ }
+
/* user modified policy via module param */
if (mobile_lpm_policy != -1) {
policy = mobile_lpm_policy;
@@ -1967,8 +1975,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map));
host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports);
- if (!host)
- return -ENOMEM;
+ if (!host) {
+ rc = -ENOMEM;
+ goto err_rm_sysfs_file;
+ }
host->private_data = hpriv;
if (ahci_init_msi(pdev, n_ports, hpriv) < 0) {
@@ -2023,11 +2033,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* initialize adapter */
rc = ahci_configure_dma_masks(pdev, hpriv);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
rc = ahci_pci_reset_controller(host);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
ahci_pci_init_controller(host);
ahci_pci_print_info(host);
@@ -2036,10 +2046,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = ahci_host_activate(host, &ahci_sht);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
pm_runtime_put_noidle(&pdev->dev);
return 0;
+
+err_rm_sysfs_file:
+ sysfs_remove_file_from_group(&pdev->dev.kobj,
+ &dev_attr_remapped_nvme.attr, NULL);
+ return rc;
}
static void ahci_shutdown_one(struct pci_dev *pdev)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e1bf8a19b3c8..74b59b78d278 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4137,8 +4137,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "PIONEER BD-RW BDR-205", NULL, ATA_HORKAGE_NOLPM },
/* Crucial devices with broken LPM support */
- { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM },
- { "CT240BX500SSD1", NULL, ATA_HORKAGE_NOLPM },
+ { "CT*0BX*00SSD1", NULL, ATA_HORKAGE_NOLPM },
/* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
{ "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
@@ -5490,6 +5489,18 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
return ap;
}
+void ata_port_free(struct ata_port *ap)
+{
+ if (!ap)
+ return;
+
+ kfree(ap->pmp_link);
+ kfree(ap->slave_link);
+ kfree(ap->ncq_sense_buf);
+ kfree(ap);
+}
+EXPORT_SYMBOL_GPL(ata_port_free);
+
static void ata_devres_release(struct device *gendev, void *res)
{
struct ata_host *host = dev_get_drvdata(gendev);
@@ -5516,12 +5527,7 @@ static void ata_host_release(struct kref *kref)
int i;
for (i = 0; i < host->n_ports; i++) {
- struct ata_port *ap = host->ports[i];
-
- kfree(ap->pmp_link);
- kfree(ap->slave_link);
- kfree(ap->ncq_sense_buf);
- kfree(ap);
+ ata_port_free(host->ports[i]);
host->ports[i] = NULL;
}
kfree(host);
@@ -5571,8 +5577,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
if (!host)
return NULL;
- if (!devres_open_group(dev, NULL, GFP_KERNEL))
- goto err_free;
+ if (!devres_open_group(dev, NULL, GFP_KERNEL)) {
+ kfree(host);
+ return NULL;
+ }
dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL);
if (!dr)
@@ -5604,8 +5612,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
err_out:
devres_release_group(dev, NULL);
- err_free:
- kfree(host);
return NULL;
}
EXPORT_SYMBOL_GPL(ata_host_alloc);
@@ -5904,7 +5910,7 @@ int ata_host_register(struct ata_host *host, const struct scsi_host_template *sh
* allocation time.
*/
for (i = host->n_ports; host->ports[i]; i++)
- kfree(host->ports[i]);
+ ata_port_free(host->ports[i]);
/* give ports names and add SCSI hosts */
for (i = 0; i < host->n_ports; i++) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index bb4d30d377ae..67ce756f8dfc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1024,7 +1024,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
struct ata_device *dev)
{
- struct request_queue *q = sdev->request_queue;
int depth = 1;
if (!ata_id_has_unload(dev->id))
@@ -1038,7 +1037,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
sdev->sector_size = ATA_SECT_SIZE;
/* set DMA padding */
- blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
+ lim->dma_pad_mask = ATA_DMA_PAD_SZ - 1;
/* make room for appending the drain */
lim->max_segments--;
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 3cb455a32d92..1b85e8bf4ef9 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -816,7 +816,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
/* OHare has issues with non cache aligned DMA on some chipsets */
if (priv->kind == controller_ohare) {
lim->dma_alignment = 31;
- blk_queue_update_dma_pad(sdev->request_queue, 31);
+ lim->dma_pad_mask = 31;
/* Tell the world about it */
ata_dev_info(dev, "OHare alignment limits applied\n");
@@ -831,7 +831,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) {
/* Allright these are bad, apply restrictions */
lim->dma_alignment = 15;
- blk_queue_update_dma_pad(sdev->request_queue, 15);
+ lim->dma_pad_mask = 15;
/* We enable MWI and hack cache line size directly here, this
* is specific to this chipset and not normal values, we happen
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5b9d4aaebb81..ed209f4f2798 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -354,6 +354,15 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
QEMU based VMMs (like KVM or Xen). Say Y or M.
+config BLK_DEV_RUST_NULL
+ tristate "Rust null block driver (Experimental)"
+ depends on RUST
+ help
+ This is the Rust implementation of the null block driver. For now it
+ is only a minimal stub.
+
+ If unsure, say N.
+
config BLK_DEV_RBD
tristate "Rados block device (RBD)"
depends on INET && BLOCK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..1105a2d4fdcb 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,9 @@
# needed for trace events
ccflags-y += -I$(src)
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
+rnull_mod-y := rnull.o
+
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a25414228e47..49ced65bef4c 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -232,6 +232,7 @@ static DEFINE_MUTEX(amiflop_mutex);
static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it doesn't identify */
module_param(fd_def_df0, ulong, 0);
+MODULE_DESCRIPTION("Amiga floppy driver");
MODULE_LICENSE("GPL");
/*
@@ -1776,10 +1777,13 @@ static const struct blk_mq_ops amiflop_mq_ops = {
static int fd_alloc_disk(int drive, int system)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct gendisk *disk;
int err;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6dac8cee70f..2028795ec61c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -337,6 +337,7 @@ aoeblk_gdalloc(void *vp)
struct queue_limits lim = {
.max_hw_sectors = aoe_maxsectors,
.io_opt = SZ_2M,
+ .features = BLK_FEAT_ROTATIONAL,
};
ulong flags;
int late = 0;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index cacc4ba942a8..4ba98c6654be 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1992,9 +1992,12 @@ static const struct blk_mq_ops ataflop_mq_ops = {
static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct gendisk *disk;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
@@ -2197,4 +2200,5 @@ static void __exit atari_floppy_exit(void)
module_init(atari_floppy_init)
module_exit(atari_floppy_exit)
+MODULE_DESCRIPTION("Atari floppy driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 558d8e670566..2fd1ed101748 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -296,6 +296,7 @@ static int max_part = 1;
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+MODULE_DESCRIPTION("Ram backed block device driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
MODULE_ALIAS("rd");
@@ -335,6 +336,8 @@ static int brd_alloc(int i)
.max_hw_discard_sectors = UINT_MAX,
.max_discard_segments = 1,
.discard_granularity = PAGE_SIZE,
+ .features = BLK_FEAT_SYNCHRONOUS |
+ BLK_FEAT_NOWAIT,
};
list_for_each_entry(brd, &brd_devices, brd_list)
@@ -366,10 +369,6 @@ static int brd_alloc(int i)
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
- /* Tell the block layer that this is not a rotational device */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 113b441d4d36..f92673f05c7a 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2697,6 +2697,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
* connect.
*/
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_ROTATIONAL |
+ BLK_FEAT_STABLE_WRITES,
};
device = minor_to_device(minor);
@@ -2735,9 +2738,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
sprintf(disk->disk_name, "drbd%d", minor);
disk->private_data = device;
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
- blk_queue_write_cache(disk->queue, true, true);
-
device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
goto out_no_io_page;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 25c9d85667f1..3affb538b989 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4516,7 +4516,8 @@ static bool floppy_available(int drive)
static int floppy_alloc_disk(unsigned int drive, unsigned int type)
{
struct queue_limits lim = {
- .max_hw_sectors = 64,
+ .max_hw_sectors = 64,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct gendisk *disk;
@@ -5016,6 +5017,7 @@ module_param(floppy, charp, 0);
module_param(FLOPPY_IRQ, int, 0);
module_param(FLOPPY_DMA, int, 0);
MODULE_AUTHOR("Alain L. Knaff");
+MODULE_DESCRIPTION("Normal floppy disk support");
MODULE_LICENSE("GPL");
/* This doesn't actually get used other than for module information */
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1153721bc7c2..78a7bb28defe 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -211,13 +211,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
if (lo->lo_state == Lo_bound)
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio) {
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ if (use_dio)
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- } else {
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ else
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
- }
if (lo->lo_state == Lo_bound)
blk_mq_unfreeze_queue(lo->lo_queue);
}
@@ -939,24 +936,6 @@ static void loop_free_idle_workers_timer(struct timer_list *timer)
return loop_free_idle_workers(lo, false);
}
-static void loop_update_rotational(struct loop_device *lo)
-{
- struct file *file = lo->lo_backing_file;
- struct inode *file_inode = file->f_mapping->host;
- struct block_device *file_bdev = file_inode->i_sb->s_bdev;
- struct request_queue *q = lo->lo_queue;
- bool nonrot = true;
-
- /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
- if (file_bdev)
- nonrot = bdev_nonrot(file_bdev);
-
- if (nonrot)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
-}
-
/**
* loop_set_status_from_info - configure device from loop_info
* @lo: struct loop_device to configure
@@ -998,17 +977,40 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize,
- bool update_discard_settings)
+static unsigned short loop_default_blocksize(struct loop_device *lo,
+ struct block_device *backing_bdev)
{
+ /* In case of direct I/O, match underlying block size */
+ if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
+ return bdev_logical_block_size(backing_bdev);
+ return SECTOR_SIZE;
+}
+
+static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize)
+{
+ struct file *file = lo->lo_backing_file;
+ struct inode *inode = file->f_mapping->host;
+ struct block_device *backing_bdev = NULL;
struct queue_limits lim;
+ if (S_ISBLK(inode->i_mode))
+ backing_bdev = I_BDEV(inode);
+ else if (inode->i_sb->s_bdev)
+ backing_bdev = inode->i_sb->s_bdev;
+
+ if (!bsize)
+ bsize = loop_default_blocksize(lo, backing_bdev);
+
lim = queue_limits_start_update(lo->lo_queue);
lim.logical_block_size = bsize;
lim.physical_block_size = bsize;
lim.io_min = bsize;
- if (update_discard_settings)
- loop_config_discard(lo, &lim);
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
+ if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (backing_bdev && !bdev_nonrot(backing_bdev))
+ lim.features |= BLK_FEAT_ROTATIONAL;
+ loop_config_discard(lo, &lim);
return queue_limits_commit_update(lo->lo_queue, &lim);
}
@@ -1017,12 +1019,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
const struct loop_config *config)
{
struct file *file = fget(config->fd);
- struct inode *inode;
struct address_space *mapping;
int error;
loff_t size;
bool partscan;
- unsigned short bsize;
bool is_loop;
if (!file)
@@ -1055,19 +1055,12 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
goto out_unlock;
mapping = file->f_mapping;
- inode = mapping->host;
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
goto out_unlock;
}
- if (config->block_size) {
- error = blk_validate_block_size(config->block_size);
- if (error)
- goto out_unlock;
- }
-
error = loop_set_status_from_info(lo, &config->info);
if (error)
goto out_unlock;
@@ -1098,22 +1091,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
- blk_queue_write_cache(lo->lo_queue, true, false);
-
- if (config->block_size)
- bsize = config->block_size;
- else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
- /* In case of direct I/O, match underlying block size */
- bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
- else
- bsize = 512;
-
- error = loop_reconfigure_limits(lo, bsize, true);
- if (WARN_ON_ONCE(error))
+ error = loop_reconfigure_limits(lo, config->block_size);
+ if (error)
goto out_unlock;
- loop_update_rotational(lo);
loop_update_dio(lo);
loop_sysfs_init(lo);
@@ -1154,22 +1135,12 @@ out_putf:
return error;
}
-static void __loop_clr_fd(struct loop_device *lo, bool release)
+static void __loop_clr_fd(struct loop_device *lo)
{
+ struct queue_limits lim;
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
- blk_queue_write_cache(lo->lo_queue, false, false);
-
- /*
- * Freeze the request queue when unbinding on a live file descriptor and
- * thus an open device. When called from ->release we are guaranteed
- * that there is no I/O in progress already.
- */
- if (!release)
- blk_mq_freeze_queue(lo->lo_queue);
-
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
@@ -1179,7 +1150,14 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
lo->lo_offset = 0;
lo->lo_sizelimit = 0;
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
- loop_reconfigure_limits(lo, 512, false);
+
+ /* reset the block size to the default */
+ lim = queue_limits_start_update(lo->lo_queue);
+ lim.logical_block_size = SECTOR_SIZE;
+ lim.physical_block_size = SECTOR_SIZE;
+ lim.io_min = SECTOR_SIZE;
+ queue_limits_commit_update(lo->lo_queue, &lim);
+
invalidate_disk(lo->lo_disk);
loop_sysfs_exit(lo);
/* let user-space know about this change */
@@ -1187,8 +1165,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- if (!release)
- blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk);
@@ -1203,11 +1179,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (!release)
- mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- if (!release)
- mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
@@ -1256,24 +1228,16 @@ static int loop_clr_fd(struct loop_device *lo)
return -ENXIO;
}
/*
- * If we've explicitly asked to tear down the loop device,
- * and it has an elevated reference count, set it for auto-teardown when
- * the last reference goes away. This stops $!~#$@ udev from
- * preventing teardown because it decided that it needs to run blkid on
- * the loopback device whenever they appear. xfstests is notorious for
- * failing tests because blkid via udev races with a losetup
- * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
- * command to fail with EBUSY.
+ * Mark the device for removing the backing device on last close.
+ * If we are the only opener, also switch the state to roundown here to
+ * prevent new openers from coming in.
*/
- if (disk_openers(lo->lo_disk) > 1) {
- lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- loop_global_unlock(lo, true);
- return 0;
- }
- lo->lo_state = Lo_rundown;
+
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ if (disk_openers(lo->lo_disk) == 1)
+ lo->lo_state = Lo_rundown;
loop_global_unlock(lo, true);
- __loop_clr_fd(lo, false);
return 0;
}
@@ -1500,10 +1464,6 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (lo->lo_state != Lo_bound)
return -ENXIO;
- err = blk_validate_block_size(arg);
- if (err)
- return err;
-
if (lo->lo_queue->limits.logical_block_size == arg)
return 0;
@@ -1511,7 +1471,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
- err = loop_reconfigure_limits(lo, arg, false);
+ err = loop_reconfigure_limits(lo, arg);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
@@ -1740,25 +1700,43 @@ static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
}
#endif
+static int lo_open(struct gendisk *disk, blk_mode_t mode)
+{
+ struct loop_device *lo = disk->private_data;
+ int err;
+
+ err = mutex_lock_killable(&lo->lo_mutex);
+ if (err)
+ return err;
+
+ if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
+ err = -ENXIO;
+ mutex_unlock(&lo->lo_mutex);
+ return err;
+}
+
static void lo_release(struct gendisk *disk)
{
struct loop_device *lo = disk->private_data;
+ bool need_clear = false;
if (disk_openers(disk) > 0)
return;
+ /*
+ * Clear the backing device information if this is the last close of
+ * a device that's been marked for auto clear, or on which LOOP_CLR_FD
+ * has been called.
+ */
mutex_lock(&lo->lo_mutex);
- if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
- /*
- * In autoclear mode, stop the loop thread
- * and remove configuration after last close.
- */
- __loop_clr_fd(lo, true);
- return;
- }
+
+ need_clear = (lo->lo_state == Lo_rundown);
mutex_unlock(&lo->lo_mutex);
+
+ if (need_clear)
+ __loop_clr_fd(lo);
}
static void lo_free_disk(struct gendisk *disk)
@@ -1775,6 +1753,7 @@ static void lo_free_disk(struct gendisk *disk)
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
+ .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
@@ -1853,6 +1832,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH));
+MODULE_DESCRIPTION("Loopback device support");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
@@ -2060,14 +2040,6 @@ static int loop_add(int i)
lo->lo_queue = lo->lo_disk->queue;
/*
- * By default, we do buffer IO, so it doesn't make sense to enable
- * merge because the I/O submitted to backing file is handled page by
- * page. For directio mode, merge does help to dispatch bigger request
- * to underlayer disk. We will enable merge once directio is enabled.
- */
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
-
- /*
* Disable partition scanning by default. The in-kernel partition
* scanning can be requested individually per-device during its
* setup. Userspace can always add and remove partitions from all
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 43a187609ef7..c6ef0546ffc9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3485,8 +3485,6 @@ skip_create_disk:
goto start_service_thread;
/* Set device limits. */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
/* Set the capacity of the device in 512 byte sectors. */
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 27b2187e7a6d..b9fdeff31caf 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -150,8 +150,6 @@ static int __init n64cart_probe(struct platform_device *pdev)
set_capacity(disk, size >> SECTOR_SHIFT);
set_disk_ro(disk, 1);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b87aa80a46dd..41a90150b501 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -342,6 +342,14 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
lim.max_hw_discard_sectors = UINT_MAX;
else
lim.max_hw_discard_sectors = 0;
+ if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) {
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ } else if (nbd->config->flags & NBD_FLAG_SEND_FUA) {
+ lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
+ } else {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ lim.features &= ~BLK_FEAT_FUA;
+ }
lim.logical_block_size = blksize;
lim.physical_block_size = blksize;
error = queue_limits_commit_update(nbd->disk->queue, &lim);
@@ -1279,19 +1287,10 @@ static void nbd_bdev_reset(struct nbd_device *nbd)
static void nbd_parse_flags(struct nbd_device *nbd)
{
- struct nbd_config *config = nbd->config;
- if (config->flags & NBD_FLAG_READ_ONLY)
+ if (nbd->config->flags & NBD_FLAG_READ_ONLY)
set_disk_ro(nbd->disk, true);
else
set_disk_ro(nbd->disk, false);
- if (config->flags & NBD_FLAG_SEND_FLUSH) {
- if (config->flags & NBD_FLAG_SEND_FUA)
- blk_queue_write_cache(nbd->disk->queue, true, true);
- else
- blk_queue_write_cache(nbd->disk->queue, true, false);
- }
- else
- blk_queue_write_cache(nbd->disk->queue, false, false);
}
static void send_disconnects(struct nbd_device *nbd)
@@ -1801,7 +1800,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
struct queue_limits lim = {
.max_hw_sectors = 65536,
- .max_user_sectors = 256,
+ .io_opt = 256 << SECTOR_SHIFT,
.max_segments = USHRT_MAX,
.max_segment_size = UINT_MAX,
};
@@ -1861,11 +1860,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
goto out_err_disk;
}
- /*
- * Tell the block layer that we are not a rotational device
- */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
/*
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 75f189e42f88..2f0431e42c49 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -77,7 +77,7 @@ enum {
NULL_IRQ_TIMER = 2,
};
-static bool g_virt_boundary = false;
+static bool g_virt_boundary;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
@@ -227,7 +227,7 @@ MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (
static bool g_fua = true;
module_param_named(fua, g_fua, bool, 0444);
-MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
+MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
@@ -262,6 +262,10 @@ module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444
MODULE_PARM_DESC(zone_append_max_sectors,
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+static bool g_zone_full;
+module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
+MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -458,6 +462,7 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(zone_full, bool, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
@@ -610,6 +615,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
@@ -700,7 +706,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors\n");
+ "zone_size,zone_append_max_sectors,zone_full\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -781,6 +787,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->zone_append_max_sectors = g_zone_append_max_sectors;
+ dev->zone_full = g_zone_full;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
@@ -1824,9 +1831,6 @@ static int null_validate_conf(struct nullb_device *dev)
dev->queue_mode = NULL_Q_MQ;
}
- if (blk_validate_block_size(dev->blocksize))
- return -EINVAL;
-
if (dev->use_per_node_hctx) {
if (dev->submit_queues != nr_online_nodes)
dev->submit_queues = nr_online_nodes;
@@ -1928,6 +1932,13 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_tags;
}
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (dev->fua)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
if (IS_ERR(nullb->disk)) {
rv = PTR_ERR(nullb->disk);
@@ -1940,13 +1951,7 @@ static int null_add_dev(struct nullb_device *dev)
nullb_setup_bwtimer(nullb);
}
- if (dev->cache_size > 0) {
- set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
- blk_queue_write_cache(nullb->q, true, dev->fua);
- }
-
nullb->q->queuedata = nullb;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
if (rv < 0)
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 3234e6c85eed..a7bb32f73ec3 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -101,6 +101,7 @@ struct nullb_device {
bool memory_backed; /* if data is stored in memory */
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
+ bool zone_full; /* Initialize zones to be full */
bool virt_boundary; /* virtual boundary on/off for the device */
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index f118d304f310..9bc768b2ca56 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -145,7 +145,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone = &dev->zones[i];
null_init_zone_lock(dev, zone);
- zone->start = zone->wp = sector;
+ zone->start = sector;
if (zone->start + dev->zone_size_sects > dev_capacity_sects)
zone->len = dev_capacity_sects - zone->start;
else
@@ -153,12 +153,18 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone->capacity =
min_t(sector_t, zone->len, zone_capacity_sects);
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
- zone->cond = BLK_ZONE_COND_EMPTY;
+ if (dev->zone_full) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->capacity;
+ } else{
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ }
sector += dev->zone_size_sects;
}
- lim->zoned = true;
+ lim->features |= BLK_FEAT_ZONED;
lim->chunk_sectors = dev->zone_size_sects;
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
@@ -171,9 +177,6 @@ int null_register_zoned_dev(struct nullb *nullb)
struct request_queue *q = nullb->q;
struct gendisk *disk = nullb->disk;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- disk->nr_zones = bdev_nr_zones(disk->part0);
-
pr_info("%s: using %s zone append\n",
disk->disk_name,
queue_emulates_zone_append(q) ? "emulated" : "native");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 8a2ce8070010..7cece5884b9c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2622,6 +2622,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
struct queue_limits lim = {
.max_hw_sectors = PACKET_MAX_SECTORS,
.logical_block_size = CD_FRAMESIZE,
+ .features = BLK_FEAT_ROTATIONAL,
};
int idx;
int ret = -ENOMEM;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index b810ac0a5c4b..ff45ed766469 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -388,9 +388,9 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
.max_segments = -1,
.max_segment_size = dev->bounce_size,
.dma_alignment = dev->blk_size - 1,
+ .features = BLK_FEAT_WRITE_CACHE |
+ BLK_FEAT_ROTATIONAL,
};
-
- struct request_queue *queue;
struct gendisk *gendisk;
if (dev->blk_size < 512) {
@@ -447,10 +447,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
goto fail_free_tag_set;
}
- queue = gendisk->queue;
-
- blk_queue_write_cache(queue, true, false);
-
priv->gendisk = gendisk;
gendisk->major = ps3disk_major;
gendisk->first_minor = devidx * PS3DISK_MINORS;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 26ff5cd2bf0a..008e850555f4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4949,14 +4949,12 @@ static const struct blk_mq_ops rbd_mq_ops = {
static int rbd_init_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk;
- struct request_queue *q;
unsigned int objset_bytes =
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
struct queue_limits lim = {
.max_hw_sectors = objset_bytes >> SECTOR_SHIFT,
- .max_user_sectors = objset_bytes >> SECTOR_SHIFT,
+ .io_opt = objset_bytes,
.io_min = rbd_dev->opts->alloc_size,
- .io_opt = rbd_dev->opts->alloc_size,
.max_segments = USHRT_MAX,
.max_segment_size = UINT_MAX,
};
@@ -4980,12 +4978,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT;
}
+ if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_tag_set;
}
- q = disk->queue;
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
rbd_dev->dev_id);
@@ -4997,13 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
disk->minors = RBD_MINORS_PER_MAJOR;
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
-
- if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
-
rbd_dev->disk = disk;
return 0;
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 39887556cf95..6ea7c12e3a87 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -475,7 +475,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
}
}
-static struct kobj_type rnbd_dev_ktype = {
+static const struct kobj_type rnbd_dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = rnbd_dev_groups,
};
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index b7ffe03c6160..c34695d2eea7 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1352,10 +1352,6 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
if (dev->access_mode == RNBD_ACCESS_RO)
set_disk_ro(dev->gd, true);
- /*
- * Network device does not need rotational
- */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
err = add_disk(dev->gd);
if (err)
put_disk(dev->gd);
@@ -1389,18 +1385,18 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
le32_to_cpu(rsp->max_discard_sectors);
}
+ if (rsp->cache_policy & RNBD_WRITEBACK) {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (rsp->cache_policy & RNBD_FUA)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev);
if (IS_ERR(dev->gd))
return PTR_ERR(dev->gd);
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
- blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
- blk_queue_write_cache(dev->queue,
- !!(rsp->cache_policy & RNBD_WRITEBACK),
- !!(rsp->cache_policy & RNBD_FUA));
-
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index cba6ba43c2c2..64780094442c 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -33,7 +33,7 @@ static void rnbd_srv_dev_release(struct kobject *kobj)
kfree(dev);
}
-static struct kobj_type dev_ktype = {
+static const struct kobj_type dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = rnbd_srv_dev_release
};
@@ -184,7 +184,7 @@ static void rnbd_srv_sess_dev_release(struct kobject *kobj)
rnbd_destroy_sess_dev(sess_dev, sess_dev->keep_id);
}
-static struct kobj_type rnbd_srv_sess_dev_ktype = {
+static const struct kobj_type rnbd_srv_sess_dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = rnbd_srv_sess_dev_release,
};
diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs
new file mode 100644
index 000000000000..b0227cf9ddd3
--- /dev/null
+++ b/drivers/block/rnull.rs
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This is a Rust implementation of the C null block driver.
+//!
+//! Supported features:
+//!
+//! - blk-mq interface
+//! - direct completion
+//! - block size 4k
+//!
+//! The driver is not configurable.
+
+use kernel::{
+ alloc::flags,
+ block::mq::{
+ self,
+ gen_disk::{self, GenDisk},
+ Operations, TagSet,
+ },
+ error::Result,
+ new_mutex, pr_info,
+ prelude::*,
+ sync::{Arc, Mutex},
+ types::ARef,
+};
+
+module! {
+ type: NullBlkModule,
+ name: "rnull_mod",
+ author: "Andreas Hindborg",
+ license: "GPL v2",
+}
+
+struct NullBlkModule {
+ _disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>,
+}
+
+impl kernel::Module for NullBlkModule {
+ fn init(_module: &'static ThisModule) -> Result<Self> {
+ pr_info!("Rust null_blk loaded\n");
+ let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+
+ let disk = gen_disk::GenDiskBuilder::new()
+ .capacity_sectors(4096 << 11)
+ .logical_block_size(4096)?
+ .physical_block_size(4096)?
+ .rotational(false)
+ .build(format_args!("rnullb{}", 0), tagset)?;
+
+ let disk = Box::pin_init(new_mutex!(disk, "nullb:disk"), flags::GFP_KERNEL)?;
+
+ Ok(Self { _disk: disk })
+ }
+}
+
+struct NullBlkDevice;
+
+#[vtable]
+impl Operations for NullBlkDevice {
+ #[inline(always)]
+ fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
+ mq::Request::end_ok(rq)
+ .map_err(|_e| kernel::error::code::EIO)
+ // We take no refcounts on the request, so we expect to be able to
+ // end the request. The request reference must be unique at this
+ // point, and so `end_ok` cannot fail.
+ .expect("Fatal error - expected to be able to end request");
+
+ Ok(())
+ }
+
+ fn commit_rqs() {}
+}
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5286cb8e0824..2d38331ee667 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -791,6 +791,7 @@ static int probe_disk(struct vdc_port *port)
.seg_boundary_mask = PAGE_SIZE - 1,
.max_segment_size = PAGE_SIZE,
.max_segments = port->ring_cookies,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct request_queue *q;
struct gendisk *g;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 6731678f3a41..126f151c4f2c 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -787,6 +787,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs)
static int swim_floppy_init(struct swim_priv *swd)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
int err;
int drive;
struct swim __iomem *base = swd->base;
@@ -820,7 +823,7 @@ static int swim_floppy_init(struct swim_priv *swd)
goto exit_put_disks;
swd->unit[drive].disk =
- blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL,
+ blk_mq_alloc_disk(&swd->unit[drive].tag_set, &lim,
&swd->unit[drive]);
if (IS_ERR(swd->unit[drive].disk)) {
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index a04756ac778e..90be1017f7bf 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1189,6 +1189,9 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
static int swim3_attach(struct macio_dev *mdev,
const struct of_device_id *match)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct floppy_state *fs;
struct gendisk *disk;
int rc;
@@ -1210,7 +1213,7 @@ static int swim3_attach(struct macio_dev *mdev,
if (rc)
goto out_unregister;
- disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs);
+ disk = blk_mq_alloc_disk(&fs->tag_set, &lim, fs);
if (IS_ERR(disk)) {
rc = PTR_ERR(disk);
goto out_free_tag_set;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 4e159948c912..6fb799ec0d88 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -248,8 +248,6 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
-
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
}
@@ -484,16 +482,8 @@ static inline unsigned ublk_pos_to_tag(loff_t pos)
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
{
- struct request_queue *q = ub->ub_disk->queue;
const struct ublk_param_basic *p = &ub->params.basic;
- blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
- p->attrs & UBLK_ATTR_FUA);
- if (p->attrs & UBLK_ATTR_ROTATIONAL)
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-
if (p->attrs & UBLK_ATTR_READ_ONLY)
set_disk_ro(ub->ub_disk, true);
@@ -2204,12 +2194,21 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return -EOPNOTSUPP;
- lim.zoned = true;
+ lim.features |= BLK_FEAT_ZONED;
lim.max_active_zones = p->max_active_zones;
lim.max_open_zones = p->max_open_zones;
lim.max_zone_append_sectors = p->max_zone_append_sectors;
}
+ if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (ub->params.basic.attrs & UBLK_ATTR_FUA)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
+ if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
+ lim.features |= BLK_FEAT_ROTATIONAL;
+
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
@@ -3017,4 +3016,5 @@ module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <[email protected]>");
+MODULE_DESCRIPTION("Userspace block device");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2351f411fa46..e3147a611151 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
- lim->zoned = true;
+ lim->features |= BLK_FEAT_ZONED;
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
@@ -1089,14 +1089,6 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
return writeback;
}
-static void virtblk_update_cache_mode(struct virtio_device *vdev)
-{
- u8 writeback = virtblk_get_cache_mode(vdev);
- struct virtio_blk *vblk = vdev->priv;
-
- blk_queue_write_cache(vblk->disk->queue, writeback, false);
-}
-
static const char *const virtblk_cache_types[] = {
"write through", "write back"
};
@@ -1108,6 +1100,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
struct gendisk *disk = dev_to_disk(dev);
struct virtio_blk *vblk = disk->private_data;
struct virtio_device *vdev = vblk->vdev;
+ struct queue_limits lim;
int i;
BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
@@ -1116,7 +1109,17 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
return i;
virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
- virtblk_update_cache_mode(vdev);
+
+ lim = queue_limits_start_update(disk->queue);
+ if (virtblk_get_cache_mode(vdev))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ else
+ lim.features &= ~BLK_FEAT_WRITE_CACHE;
+ blk_mq_freeze_queue(disk->queue);
+ i = queue_limits_commit_update(disk->queue, &lim);
+ blk_mq_unfreeze_queue(disk->queue);
+ if (i)
+ return i;
return count;
}
@@ -1247,7 +1250,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
struct queue_limits *lim)
{
struct virtio_device *vdev = vblk->vdev;
- u32 v, blk_size, max_size, sg_elems, opt_io_size;
+ u32 v, max_size, sg_elems, opt_io_size;
u32 max_discard_segs = 0;
u32 discard_granularity = 0;
u16 min_io_size;
@@ -1286,46 +1289,36 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
lim->max_segment_size = max_size;
/* Host can optionally specify the block size of the device */
- err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
+ virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
struct virtio_blk_config, blk_size,
- &blk_size);
- if (!err) {
- err = blk_validate_block_size(blk_size);
- if (err) {
- dev_err(&vdev->dev,
- "virtio_blk: invalid block size: 0x%x\n",
- blk_size);
- return err;
- }
-
- lim->logical_block_size = blk_size;
- } else
- blk_size = lim->logical_block_size;
+ &lim->logical_block_size);
/* Use topology information if available */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, physical_block_exp,
&physical_block_exp);
if (!err && physical_block_exp)
- lim->physical_block_size = blk_size * (1 << physical_block_exp);
+ lim->physical_block_size =
+ lim->logical_block_size * (1 << physical_block_exp);
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, alignment_offset,
&alignment_offset);
if (!err && alignment_offset)
- lim->alignment_offset = blk_size * alignment_offset;
+ lim->alignment_offset =
+ lim->logical_block_size * alignment_offset;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, min_io_size,
&min_io_size);
if (!err && min_io_size)
- lim->io_min = blk_size * min_io_size;
+ lim->io_min = lim->logical_block_size * min_io_size;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, opt_io_size,
&opt_io_size);
if (!err && opt_io_size)
- lim->io_opt = blk_size * opt_io_size;
+ lim->io_opt = lim->logical_block_size * opt_io_size;
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
virtio_cread(vdev, struct virtio_blk_config,
@@ -1419,7 +1412,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
lim->discard_granularity =
discard_granularity << SECTOR_SHIFT;
else
- lim->discard_granularity = blk_size;
+ lim->discard_granularity = lim->logical_block_size;
}
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
@@ -1448,7 +1441,10 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
static int virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
- struct queue_limits lim = { };
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ .logical_block_size = SECTOR_SIZE,
+ };
int err, index;
unsigned int queue_depth;
@@ -1512,6 +1508,9 @@ static int virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_free_tags;
+ if (virtblk_get_cache_mode(vdev))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk);
if (IS_ERR(vblk->disk)) {
err = PTR_ERR(vblk->disk);
@@ -1527,9 +1526,6 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->disk->fops = &virtblk_fops;
vblk->index = index;
- /* configure queue flush support */
- virtblk_update_cache_mode(vdev);
-
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
set_disk_ro(vblk->disk, 1);
@@ -1541,8 +1537,8 @@ static int virtblk_probe(struct virtio_device *vdev)
* All steps that follow use the VQs therefore they need to be
* placed after the virtio_device_ready() call above.
*/
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (lim.features & BLK_FEAT_ZONED)) {
err = blk_revalidate_disk_zones(vblk->disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 944576d582fb..838064593f62 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1563,5 +1563,6 @@ static void __exit xen_blkif_fini(void)
module_exit(xen_blkif_fini);
+MODULE_DESCRIPTION("Virtual block device back-end driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vbd");
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fd7c0ff2139c..59ce113b882a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -788,6 +788,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
* A barrier request a superset of FUA, so we can
* implement it the same way. (It's also a FLUSH+FUA,
* since it is guaranteed ordered WRT previous writes.)
+ *
+ * Note that can end up here with a FUA write and the
+ * flags cleared. This happens when the flag was
+ * run-time disabled after a failing I/O, and we'll
+ * simplify submit it as a normal write.
*/
if (info->feature_flush && info->feature_fua)
ring_req->operation =
@@ -795,8 +800,6 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
else if (info->feature_flush)
ring_req->operation =
BLKIF_OP_FLUSH_DISKCACHE;
- else
- ring_req->operation = 0;
}
ring_req->u.rw.nr_segments = num_grant;
if (unlikely(require_extra_req)) {
@@ -887,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
notify_remote_via_irq(rinfo->irq);
}
-static inline bool blkif_request_flush_invalid(struct request *req,
- struct blkfront_info *info)
-{
- return (blk_rq_is_passthrough(req) ||
- ((req_op(req) == REQ_OP_FLUSH) &&
- !info->feature_flush) ||
- ((req->cmd_flags & REQ_FUA) &&
- !info->feature_fua));
-}
-
static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
@@ -908,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
rinfo = get_rinfo(info, qid);
blk_mq_start_request(qd->rq);
spin_lock_irqsave(&rinfo->ring_lock, flags);
- if (RING_FULL(&rinfo->ring))
- goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
- goto out_err;
+ /*
+ * Check if the backend actually supports flushes.
+ *
+ * While the block layer won't send us flushes if we don't claim to
+ * support them, the Xen protocol allows the backend to revoke support
+ * at any time. That is of course a really bad idea and dangerous, but
+ * has been allowed for 10+ years. In that case we simply clear the
+ * flags, and directly return here for an empty flush and ignore the
+ * FUA flag later on.
+ */
+ if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush))
+ goto complete;
+ if (RING_FULL(&rinfo->ring))
+ goto out_busy;
if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
@@ -921,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_OK;
-out_err:
- spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_STS_IOERR;
-
out_busy:
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_DEV_RESOURCE;
+complete:
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ blk_mq_end_request(qd->rq, BLK_STS_OK);
+ return BLK_STS_OK;
}
static void blkif_complete_rq(struct request *rq)
@@ -956,6 +959,12 @@ static void blkif_set_queue_limits(const struct blkfront_info *info,
lim->max_secure_erase_sectors = UINT_MAX;
}
+ if (info->feature_flush) {
+ lim->features |= BLK_FEAT_WRITE_CACHE;
+ if (info->feature_fua)
+ lim->features |= BLK_FEAT_FUA;
+ }
+
/* Hard sector size and max sectors impersonate the equiv. hardware. */
lim->logical_block_size = info->sector_size;
lim->physical_block_size = info->physical_sector_size;
@@ -984,8 +993,6 @@ static const char *flush_info(struct blkfront_info *info)
static void xlvbd_flush(struct blkfront_info *info)
{
- blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
- info->feature_fua ? true : false);
pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
@@ -1063,8 +1070,7 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
struct queue_limits lim = {};
struct gendisk *gd;
@@ -1139,7 +1145,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = PTR_ERR(gd);
goto out_free_tag_set;
}
- blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue);
strcpy(gd->disk_name, DEV_NAME);
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
@@ -1159,8 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
xlvbd_flush(info);
@@ -1605,8 +1608,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_max_discard_sectors(rq, 0);
- blk_queue_max_secure_erase_sectors(rq, 0);
+ blk_queue_disable_discard(rq);
+ blk_queue_disable_secure_erase(rq);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
@@ -1627,7 +1630,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
- xlvbd_flush(info);
}
fallthrough;
case BLKIF_OP_READ:
@@ -2315,8 +2317,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2355,7 +2355,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
"info", "%u", &info->vdisk_info,
- "sector-size", "%lu", &sector_size,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2369,9 +2369,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2383,8 +2383,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 7c5f4e4d9b50..4b7219be1bb8 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -409,4 +409,5 @@ static void __exit z2_exit(void)
module_init(z2_init);
module_exit(z2_exit);
+MODULE_DESCRIPTION("Amiga Zorro II ramdisk driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 3acd7006ad2c..efcb8d9d274c 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2208,6 +2208,8 @@ static int zram_add(void)
#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
.max_write_zeroes_sectors = UINT_MAX,
#endif
+ .features = BLK_FEAT_STABLE_WRITES |
+ BLK_FEAT_SYNCHRONOUS,
};
struct zram *zram;
int ret, device_id;
@@ -2245,10 +2247,6 @@ static int zram_add(void)
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
- /* zram devices sort of resembles non-rotational disks */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 5b6805d87fcf..dd3c0626c72d 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -382,7 +382,7 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data,
/* The first 4 bytes indicates the Intel PCIe specific packet type */
pdata = skb_pull_data(skb, BTINTEL_PCIE_HCI_TYPE_LEN);
- if (!data) {
+ if (!pdata) {
bt_dev_err(hdev, "Corrupted packet received");
ret = -EILSEQ;
goto exit_error;
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 9d0c7e278114..9bfa9a6ad56c 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -281,7 +281,7 @@ static u8 crc8_table[CRC8_TABLE_SIZE];
/* Default configurations */
#define DEFAULT_H2C_WAKEUP_MODE WAKEUP_METHOD_BREAK
-#define DEFAULT_PS_MODE PS_MODE_DISABLE
+#define DEFAULT_PS_MODE PS_MODE_ENABLE
#define FW_INIT_BAUDRATE HCI_NXP_PRI_BAUDRATE
static struct sk_buff *nxp_drv_send_cmd(struct hci_dev *hdev, u16 opcode,
diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c
index 0c2f15235b4c..d90858ea2fe5 100644
--- a/drivers/bluetooth/hci_bcm4377.c
+++ b/drivers/bluetooth/hci_bcm4377.c
@@ -495,6 +495,10 @@ struct bcm4377_data;
* extended scanning
* broken_mws_transport_config: Set to true if the chip erroneously claims to
* support MWS Transport Configuration
+ * broken_le_ext_adv_report_phy: Set to true if this chip stuffs flags inside
+ * reserved bits of Primary/Secondary_PHY inside
+ * LE Extended Advertising Report events which
+ * have to be ignored
* send_calibration: Optional callback to send calibration data
* send_ptb: Callback to send "PTB" regulatory/calibration data
*/
@@ -513,6 +517,7 @@ struct bcm4377_hw {
unsigned long broken_ext_scan : 1;
unsigned long broken_mws_transport_config : 1;
unsigned long broken_le_coded : 1;
+ unsigned long broken_le_ext_adv_report_phy : 1;
int (*send_calibration)(struct bcm4377_data *bcm4377);
int (*send_ptb)(struct bcm4377_data *bcm4377,
@@ -716,7 +721,7 @@ static void bcm4377_handle_ack(struct bcm4377_data *bcm4377,
ring->events[msgid] = NULL;
}
- bitmap_release_region(ring->msgids, msgid, ring->n_entries);
+ bitmap_release_region(ring->msgids, msgid, 0);
unlock:
spin_unlock_irqrestore(&ring->lock, flags);
@@ -2373,6 +2378,8 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
if (bcm4377->hw->broken_le_coded)
set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks);
+ if (bcm4377->hw->broken_le_ext_adv_report_phy)
+ set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks);
pci_set_drvdata(pdev, bcm4377);
hci_set_drvdata(hdev, bcm4377);
@@ -2477,6 +2484,7 @@ static const struct bcm4377_hw bcm4377_hw_variants[] = {
.clear_pciecfg_subsystem_ctrl_bit19 = true,
.broken_mws_transport_config = true,
.broken_le_coded = true,
+ .broken_le_ext_adv_report_phy = true,
.send_calibration = bcm4387_send_calibration,
.send_ptb = bcm4378_send_ptb,
},
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 0c9c9ee56592..9a0bc86f9aac 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2450,15 +2450,27 @@ static void qca_serdev_shutdown(struct device *dev)
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
struct hci_uart *hu = &qcadev->serdev_hu;
struct hci_dev *hdev = hu->hdev;
- struct qca_data *qca = hu->priv;
const u8 ibs_wake_cmd[] = { 0xFD };
const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
if (qcadev->btsoc_type == QCA_QCA6390) {
- if (test_bit(QCA_BT_OFF, &qca->flags) ||
- !test_bit(HCI_RUNNING, &hdev->flags))
+ /* The purpose of sending the VSC is to reset SOC into a initial
+ * state and the state will ensure next hdev->setup() success.
+ * if HCI_QUIRK_NON_PERSISTENT_SETUP is set, it means that
+ * hdev->setup() can do its job regardless of SoC state, so
+ * don't need to send the VSC.
+ * if HCI_SETUP is set, it means that hdev->setup() was never
+ * invoked and the SOC is already in the initial state, so
+ * don't also need to send the VSC.
+ */
+ if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) ||
+ hci_dev_test_flag(hdev, HCI_SETUP))
return;
+ /* The serdev must be in open state when conrol logic arrives
+ * here, so also fix the use-after-free issue caused by that
+ * the serdev is flushed or wrote after it is closed.
+ */
serdev_device_write_flush(serdev);
ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
sizeof(ibs_wake_cmd));
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 20c90ebb3a3f..49e4829b7264 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3708,4 +3708,5 @@ static void __exit cdrom_exit(void)
module_init(cdrom_init);
module_exit(cdrom_exit);
+MODULE_DESCRIPTION("Uniform CD-ROM driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index eefdd422ad8e..71cfe7a85913 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -744,6 +744,7 @@ static int probe_gdrom(struct platform_device *devptr)
.max_segments = 1,
/* set a large max size to get most from DMA */
.max_segment_size = 0x40000,
+ .features = BLK_FEAT_ROTATIONAL,
};
int err;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index d51fc8321d41..da32e8ed0830 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -269,8 +269,13 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos)
if (!devp->hd_ireqfreq)
return -EIO;
- if (count < sizeof(unsigned long))
- return -EINVAL;
+ if (in_compat_syscall()) {
+ if (count < sizeof(compat_ulong_t))
+ return -EINVAL;
+ } else {
+ if (count < sizeof(unsigned long))
+ return -EINVAL;
+ }
add_wait_queue(&devp->hd_waitqueue, &wait);
@@ -294,9 +299,16 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos)
schedule();
}
- retval = put_user(data, (unsigned long __user *)buf);
- if (!retval)
- retval = sizeof(unsigned long);
+ if (in_compat_syscall()) {
+ retval = put_user(data, (compat_ulong_t __user *)buf);
+ if (!retval)
+ retval = sizeof(compat_ulong_t);
+ } else {
+ retval = put_user(data, (unsigned long __user *)buf);
+ if (!retval)
+ retval = sizeof(unsigned long);
+ }
+
out:
__set_current_state(TASK_RUNNING);
remove_wait_queue(&devp->hd_waitqueue, &wait);
@@ -651,12 +663,24 @@ struct compat_hpet_info {
unsigned short hi_timer;
};
+/* 32-bit types would lead to different command codes which should be
+ * translated into 64-bit ones before passed to hpet_ioctl_common
+ */
+#define COMPAT_HPET_INFO _IOR('h', 0x03, struct compat_hpet_info)
+#define COMPAT_HPET_IRQFREQ _IOW('h', 0x6, compat_ulong_t)
+
static long
hpet_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct hpet_info info;
int err;
+ if (cmd == COMPAT_HPET_INFO)
+ cmd = HPET_INFO;
+
+ if (cmd == COMPAT_HPET_IRQFREQ)
+ cmd = HPET_IRQFREQ;
+
mutex_lock(&hpet_mutex);
err = hpet_ioctl_common(file->private_data, cmd, arg, &info);
mutex_unlock(&hpet_mutex);
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4c695b0388f3..9bb142c75243 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -16,8 +16,8 @@ tpm-y += eventlog/common.o
tpm-y += eventlog/tpm1.o
tpm-y += eventlog/tpm2.o
tpm-y += tpm-buf.o
+tpm-y += tpm2-sessions.o
-tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o
tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o
tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 907ac9956a78..2281d55df545 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -83,9 +83,6 @@
#define AES_KEY_BYTES AES_KEYSIZE_128
#define AES_KEY_BITS (AES_KEY_BYTES*8)
-static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
- u32 *handle, u8 *name);
-
/*
* This is the structure that carries all the auth information (like
* session handle, nonces, session key and auth) from use to use it is
@@ -148,6 +145,7 @@ struct tpm2_auth {
u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE];
};
+#ifdef CONFIG_TCG_TPM2_HMAC
/*
* Name Size based on TPM algorithm (assumes no hash bigger than 255)
*/
@@ -163,6 +161,226 @@ static u8 name_size(const u8 *name)
return size_map[alg] + 2;
}
+static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+ tot_len -= TPM_HEADER_SIZE;
+
+ /* skip public */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val > tot_len)
+ return -EINVAL;
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val != name_size(&buf->data[offset]))
+ return -EINVAL;
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+}
+
+static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+{
+ struct tpm_buf buf;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+ rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+ if (rc == TPM2_RC_SUCCESS)
+ rc = tpm2_parse_read_public(name, &buf);
+
+ tpm_buf_destroy(&buf);
+
+ return rc;
+}
+#endif /* CONFIG_TCG_TPM2_HMAC */
+
+/**
+ * tpm_buf_append_name() - add a handle area to the buffer
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @handle: The handle to be appended
+ * @name: The name of the handle (may be NULL)
+ *
+ * In order to compute session HMACs, we need to know the names of the
+ * objects pointed to by the handles. For most objects, this is simply
+ * the actual 4 byte handle or an empty buf (in these cases @name
+ * should be NULL) but for volatile objects, permanent objects and NV
+ * areas, the name is defined as the hash (according to the name
+ * algorithm which should be set to sha256) of the public area to
+ * which the two byte algorithm id has been appended. For these
+ * objects, the @name pointer should point to this. If a name is
+ * required but @name is NULL, then TPM2_ReadPublic() will be called
+ * on the handle to obtain the name.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+ u32 handle, u8 *name)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
+ int slot;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ tpm_buf_append_u32(buf, handle);
+ /* count the number of handles in the upper bits of flags */
+ buf->handles++;
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+ dev_err(&chip->dev, "TPM: too many handles\n");
+ return;
+ }
+ auth = chip->auth;
+ WARN(auth->session != tpm_buf_length(buf),
+ "name added in wrong place\n");
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+ if (!name)
+ tpm2_read_public(chip, handle, auth->name[slot]);
+ } else {
+ if (name)
+ dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
+ }
+
+ auth->name_h[slot] = handle;
+ if (name)
+ memcpy(auth->name[slot], name, name_size(name));
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+
+/**
+ * tpm_buf_append_hmac_session() - Append a TPM session element
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @attributes: The session attributes
+ * @passphrase: The session authority (NULL if none)
+ * @passphrase_len: The length of the session authority (0 if none)
+ *
+ * This fills in a session structure in the TPM command buffer, except
+ * for the HMAC which cannot be computed until the command buffer is
+ * complete. The type of session is controlled by the @attributes,
+ * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
+ * session won't terminate after tpm_buf_check_hmac_response(),
+ * TPM2_SA_DECRYPT which means this buffers first parameter should be
+ * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
+ * response buffer's first parameter needs to be decrypted (confusing,
+ * but the defines are written from the point of view of the TPM).
+ *
+ * Any session appended by this command must be finalized by calling
+ * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
+ * and the TPM will reject the command.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphrase_len)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ u8 nonce[SHA256_DIGEST_SIZE];
+ struct tpm2_auth *auth;
+ u32 len;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ /* offset tells us where the sessions area begins */
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ u32 len = 9 + passphrase_len;
+
+ if (tpm_buf_length(buf) != offset) {
+ /* not the first session so update the existing length */
+ len += get_unaligned_be32(&buf->data[offset]);
+ put_unaligned_be32(len, &buf->data[offset]);
+ } else {
+ tpm_buf_append_u32(buf, len);
+ }
+ /* auth handle */
+ tpm_buf_append_u32(buf, TPM2_RS_PW);
+ /* nonce */
+ tpm_buf_append_u16(buf, 0);
+ /* attributes */
+ tpm_buf_append_u8(buf, 0);
+ /* passphrase */
+ tpm_buf_append_u16(buf, passphrase_len);
+ tpm_buf_append(buf, passphrase, passphrase_len);
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ /*
+ * The Architecture Guide requires us to strip trailing zeros
+ * before computing the HMAC
+ */
+ while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0')
+ passphrase_len--;
+
+ auth = chip->auth;
+ auth->attrs = attributes;
+ auth->passphrase_len = passphrase_len;
+ if (passphrase_len)
+ memcpy(auth->passphrase, passphrase, passphrase_len);
+
+ if (auth->session != tpm_buf_length(buf)) {
+ /* we're not the first session */
+ len = get_unaligned_be32(&buf->data[auth->session]);
+ if (4 + len + auth->session != tpm_buf_length(buf)) {
+ WARN(1, "session length mismatch, cannot append");
+ return;
+ }
+
+ /* add our new session */
+ len += 9 + 2 * SHA256_DIGEST_SIZE;
+ put_unaligned_be32(len, &buf->data[auth->session]);
+ } else {
+ tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
+ }
+
+ /* random number for our nonce */
+ get_random_bytes(nonce, sizeof(nonce));
+ memcpy(auth->our_nonce, nonce, sizeof(nonce));
+ tpm_buf_append_u32(buf, auth->handle);
+ /* our new nonce */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+ tpm_buf_append_u8(buf, auth->attrs);
+ /* and put a placeholder for the hmac */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
+ u32 *handle, u8 *name);
+
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
* because it assumes a fixed key and the TPM seems to change the key
@@ -344,82 +562,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
}
/**
- * tpm_buf_append_hmac_session() - Append a TPM session element
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @attributes: The session attributes
- * @passphrase: The session authority (NULL if none)
- * @passphrase_len: The length of the session authority (0 if none)
- *
- * This fills in a session structure in the TPM command buffer, except
- * for the HMAC which cannot be computed until the command buffer is
- * complete. The type of session is controlled by the @attributes,
- * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
- * session won't terminate after tpm_buf_check_hmac_response(),
- * TPM2_SA_DECRYPT which means this buffers first parameter should be
- * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
- * response buffer's first parameter needs to be decrypted (confusing,
- * but the defines are written from the point of view of the TPM).
- *
- * Any session appended by this command must be finalized by calling
- * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
- * and the TPM will reject the command.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphrase_len)
-{
- u8 nonce[SHA256_DIGEST_SIZE];
- u32 len;
- struct tpm2_auth *auth = chip->auth;
-
- /*
- * The Architecture Guide requires us to strip trailing zeros
- * before computing the HMAC
- */
- while (passphrase && passphrase_len > 0
- && passphrase[passphrase_len - 1] == '\0')
- passphrase_len--;
-
- auth->attrs = attributes;
- auth->passphrase_len = passphrase_len;
- if (passphrase_len)
- memcpy(auth->passphrase, passphrase, passphrase_len);
-
- if (auth->session != tpm_buf_length(buf)) {
- /* we're not the first session */
- len = get_unaligned_be32(&buf->data[auth->session]);
- if (4 + len + auth->session != tpm_buf_length(buf)) {
- WARN(1, "session length mismatch, cannot append");
- return;
- }
-
- /* add our new session */
- len += 9 + 2 * SHA256_DIGEST_SIZE;
- put_unaligned_be32(len, &buf->data[auth->session]);
- } else {
- tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
- }
-
- /* random number for our nonce */
- get_random_bytes(nonce, sizeof(nonce));
- memcpy(auth->our_nonce, nonce, sizeof(nonce));
- tpm_buf_append_u32(buf, auth->handle);
- /* our new nonce */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
- tpm_buf_append_u8(buf, auth->attrs);
- /* and put a placeholder for the hmac */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
-}
-EXPORT_SYMBOL(tpm_buf_append_hmac_session);
-
-/**
* tpm_buf_fill_hmac_session() - finalize the session HMAC
* @chip: the TPM chip structure
* @buf: The buffer to be appended
@@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
u8 cphash[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
+ if (!auth)
+ return;
+
/* save the command code in BE format */
auth->ordinal = head->ordinal;
@@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
}
EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- off_t offset = TPM_HEADER_SIZE;
- u32 tot_len = be32_to_cpu(head->length);
- u32 val;
-
- /* we're starting after the header so adjust the length */
- tot_len -= TPM_HEADER_SIZE;
-
- /* skip public */
- val = tpm_buf_read_u16(buf, &offset);
- if (val > tot_len)
- return -EINVAL;
- offset += val;
- /* name */
- val = tpm_buf_read_u16(buf, &offset);
- if (val != name_size(&buf->data[offset]))
- return -EINVAL;
- memcpy(name, &buf->data[offset], val);
- /* forget the rest */
- return 0;
-}
-
-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
-{
- struct tpm_buf buf;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, handle);
- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
- if (rc == TPM2_RC_SUCCESS)
- rc = tpm2_parse_read_public(name, &buf);
-
- tpm_buf_destroy(&buf);
-
- return rc;
-}
-
-/**
- * tpm_buf_append_name() - add a handle area to the buffer
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @handle: The handle to be appended
- * @name: The name of the handle (may be NULL)
- *
- * In order to compute session HMACs, we need to know the names of the
- * objects pointed to by the handles. For most objects, this is simply
- * the actual 4 byte handle or an empty buf (in these cases @name
- * should be NULL) but for volatile objects, permanent objects and NV
- * areas, the name is defined as the hash (according to the name
- * algorithm which should be set to sha256) of the public area to
- * which the two byte algorithm id has been appended. For these
- * objects, the @name pointer should point to this. If a name is
- * required but @name is NULL, then TPM2_ReadPublic() will be called
- * on the handle to obtain the name.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- enum tpm2_mso_type mso = tpm2_handle_mso(handle);
- struct tpm2_auth *auth = chip->auth;
- int slot;
-
- slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4;
- if (slot >= AUTH_MAX_NAMES) {
- dev_err(&chip->dev, "TPM: too many handles\n");
- return;
- }
- WARN(auth->session != tpm_buf_length(buf),
- "name added in wrong place\n");
- tpm_buf_append_u32(buf, handle);
- auth->session += 4;
-
- if (mso == TPM2_MSO_PERSISTENT ||
- mso == TPM2_MSO_VOLATILE ||
- mso == TPM2_MSO_NVRAM) {
- if (!name)
- tpm2_read_public(chip, handle, auth->name[slot]);
- } else {
- if (name)
- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
- }
-
- auth->name_h[slot] = handle;
- if (name)
- memcpy(auth->name[slot], name, name_size(name));
-}
-EXPORT_SYMBOL(tpm_buf_append_name);
-
/**
* tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness
* @chip: the TPM chip structure
@@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
u32 cc = be32_to_cpu(auth->ordinal);
int parm_len, len, i, handles;
+ if (!auth)
+ return rc;
+
if (auth->session >= TPM_HEADER_SIZE) {
WARN(1, "tpm session not filled correctly\n");
goto out;
@@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response);
*/
void tpm2_end_auth_session(struct tpm_chip *chip)
{
- tpm2_flush_context(chip, chip->auth->handle);
- memzero_explicit(chip->auth, sizeof(*chip->auth));
+ struct tpm2_auth *auth = chip->auth;
+
+ if (!auth)
+ return;
+
+ tpm2_flush_context(chip, auth->handle);
+ memzero_explicit(auth, sizeof(*auth));
}
EXPORT_SYMBOL(tpm2_end_auth_session);
@@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
int rc;
u32 null_key;
+ if (!auth) {
+ dev_warn_once(&chip->dev, "auth session is not active\n");
+ return 0;
+ }
+
rc = tpm2_load_null(chip, &null_key);
if (rc)
goto out;
@@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip)
return rc;
}
+#endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
index ba504e19d420..62d876e150e1 100644
--- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
+++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
@@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = {
static const struct mtk_clk_desc mfg_desc = {
.clks = mfg_clks,
.num_clks = ARRAY_SIZE(mfg_clks),
+ .need_runtime_pm = true,
};
static const struct of_device_id of_match_clk_mt8183_mfg[] = {
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index bd37ab4d1a9b..ba1d1c495bc2 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
}
- devm_pm_runtime_enable(&pdev->dev);
- /*
- * Do a pm_runtime_resume_and_get() to workaround a possible
- * deadlock between clk_register() and the genpd framework.
- */
- r = pm_runtime_resume_and_get(&pdev->dev);
- if (r)
- return r;
+ if (mcd->need_runtime_pm) {
+ devm_pm_runtime_enable(&pdev->dev);
+ /*
+ * Do a pm_runtime_resume_and_get() to workaround a possible
+ * deadlock between clk_register() and the genpd framework.
+ */
+ r = pm_runtime_resume_and_get(&pdev->dev);
+ if (r)
+ return r;
+ }
/* Calculate how many clk_hw_onecell_data entries to allocate */
num_clks = mcd->num_clks + mcd->num_composite_clks;
@@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
goto unregister_clks;
}
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
@@ -618,7 +621,8 @@ free_base:
if (mcd->shared_io && base)
iounmap(base);
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
}
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 22096501a60a..c17fe1c2d732 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -237,6 +237,8 @@ struct mtk_clk_desc {
int (*clk_notifier_func)(struct device *dev, struct clk *clk);
unsigned int mfg_clk_idx;
+
+ bool need_runtime_pm;
};
int mtk_clk_pdev_probe(struct platform_device *pdev);
diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c
index 5f7f537e4ecb..e8632db2c542 100644
--- a/drivers/clk/qcom/apss-ipq-pll.c
+++ b/drivers/clk/qcom/apss-ipq-pll.c
@@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = {
static const struct alpha_pll_config ipq5018_pll_config = {
.l = 0x2a,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
@@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = {
static const struct alpha_pll_config ipq5332_pll_config = {
.l = 0x2d,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index d4227909d1fe..c51647e37df8 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw,
regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll),
a >> ALPHA_BITWIDTH);
+ regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll),
+ PLL_ALPHA_EN, PLL_ALPHA_EN);
+
regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL);
/* Wait five micro seconds or more */
diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c
index 0a3f846695b8..f8b9a1e93bef 100644
--- a/drivers/clk/qcom/gcc-ipq9574.c
+++ b/drivers/clk/qcom/gcc-ipq9574.c
@@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = {
static struct clk_branch gcc_crypto_axi_clk = {
.halt_reg = 0x16010,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16010,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(15),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_axi_clk",
.parent_hws = (const struct clk_hw *[]) {
@@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = {
static struct clk_branch gcc_crypto_ahb_clk = {
.halt_reg = 0x16014,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16014,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(16),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_ahb_clk",
.parent_hws = (const struct clk_hw *[]) {
diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c
index cf4a7b6e0b23..0559a33faf00 100644
--- a/drivers/clk/qcom/gcc-sm6350.c
+++ b/drivers/clk/qcom/gcc-sm6350.c
@@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = {
.enable_mask = BIT(6),
.hw.init = &(struct clk_init_data){
.name = "gpll6",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
@@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = {
.clkr.hw.init = &(struct clk_init_data){
.name = "gpll6_out_even",
.parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ &gpll6.clkr.hw,
},
.num_parents = 1,
.ops = &clk_alpha_pll_postdiv_fabia_ops,
@@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = {
.enable_mask = BIT(7),
.hw.init = &(struct clk_init_data){
.name = "gpll7",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index ac0091b4ce24..be375ce0149c 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
for (i = 0; i < desc->hw_clks->num ; i++) {
struct clk_hw *hw = desc->hw_clks->hws[i];
- struct ccu_common *common = hw_to_ccu_common(hw);
const char *name;
if (!hw)
@@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
pr_err("Couldn't register clock %d - %s\n", i, name);
goto err_clk_unreg;
}
+ }
+
+ for (i = 0; i < desc->num_ccu_clks; i++) {
+ struct ccu_common *cclk = desc->ccu_clks[i];
+
+ if (!cclk)
+ continue;
- if (common->max_rate)
- clk_hw_set_rate_range(hw, common->min_rate,
- common->max_rate);
+ if (cclk->max_rate)
+ clk_hw_set_rate_range(&cclk->hw, cclk->min_rate,
+ cclk->max_rate);
else
- WARN(common->min_rate,
+ WARN(cclk->min_rate,
"No max_rate, ignoring min_rate of clock %d - %s\n",
- i, name);
+ i, clk_hw_get_name(&cclk->hw));
}
ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get,
diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
index 072b11fd6b32..825ae22c3ebc 100644
--- a/drivers/counter/ti-eqep.c
+++ b/drivers/counter/ti-eqep.c
@@ -6,6 +6,7 @@
*/
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/counter.h>
#include <linux/kernel.h>
#include <linux/mod_devicetable.h>
@@ -376,6 +377,7 @@ static int ti_eqep_probe(struct platform_device *pdev)
struct counter_device *counter;
struct ti_eqep_cnt *priv;
void __iomem *base;
+ struct clk *clk;
int err;
counter = devm_counter_alloc(dev, sizeof(*priv));
@@ -415,6 +417,10 @@ static int ti_eqep_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
+ clk = devm_clk_get_enabled(dev, NULL);
+ if (IS_ERR(clk))
+ return dev_err_probe(dev, PTR_ERR(clk), "failed to enable clock\n");
+
err = counter_add(counter);
if (err < 0) {
pm_runtime_put_sync(dev);
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 37f1cdf46d29..4ac3a35dcd98 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
pr_warn(FW_WARN "P-state 0 is not max freq\n");
- if (acpi_cpufreq_driver.set_boost)
+ if (acpi_cpufreq_driver.set_boost) {
set_boost(policy, acpi_cpufreq_driver.boost_enabled);
+ policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
+ }
return result;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a45aac17c20f..9e5060b27864 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu)
}
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
- policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy);
+ if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
+ policy->boost_enabled = true;
/*
* The initialization has succeeded and the policy is online.
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15de5e3d96fd..c31914a9876f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -355,15 +355,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
int ret;
ret = cppc_get_perf_caps(cpu, &cppc_perf);
- if (ret)
- return;
-
/*
- * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
- * In this case we can't use CPPC.highest_perf to enable ITMT.
- * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+ * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0].
+ *
+ * Also, on some systems with overclocking enabled, CPPC.highest_perf is
+ * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT.
+ * Fall back to MSR_HWP_CAPABILITIES then too.
*/
- if (cppc_perf.highest_perf == CPPC_MAX_PERF)
+ if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF)
cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
/*
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 6f9266edc9f1..eac73cbfdd38 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -39,7 +39,8 @@ intel_qat-objs := adf_cfg.o \
adf_sysfs_rl.o \
qat_uclo.o \
qat_hal.o \
- qat_bl.o
+ qat_bl.o \
+ qat_mig_dev.o
intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
adf_fw_counters.o \
@@ -56,6 +57,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \
adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \
- adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o
+ adf_gen2_pfvf.o adf_gen4_pfvf.o
intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 784843fa2a22..3df10517a327 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -52,6 +52,14 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
struct cxl_dport *dport = NULL;
int single_port_map[1];
unsigned long index;
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+
+ /*
+ * Capability checks are moot for passthrough decoders, support
+ * any and all possibilities.
+ */
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
cxlsd = cxl_switch_decoder_alloc(port, 1);
if (IS_ERR(cxlsd))
@@ -79,6 +87,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(11, 8);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
cxlhdm->interleave_mask |= GENMASK(14, 12);
+ cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
+ cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
+ cxlhdm->iw_cap_mask |= BIT(16);
}
static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index e69625a8d6a1..c00f3a933164 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,10 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
+/**
+ * cxl_find_nvdimm_bridge() - find a bridge device relative to a port
+ * @port: any descendant port of an nvdimm-bridge associated
+ * root-cxl-port
+ */
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port)
{
- struct cxl_root *cxl_root __free(put_cxl_root) =
- find_cxl_root(cxlmd->endpoint);
+ struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
struct device *dev;
if (!cxl_root)
@@ -242,18 +246,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd)
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
+ * @parent_port: parent port for the (to be added) @cxlmd endpoint port
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
+ struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
if (!cxl_nvb)
return -ENODEV;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3c2b6144be23..538ebd5a64fd 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1101,6 +1101,26 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
cxld = cxl_rr->decoder;
+ /*
+ * the number of targets should not exceed the target_count
+ * of the decoder
+ */
+ if (is_switch_decoder(&cxld->dev)) {
+ struct cxl_switch_decoder *cxlsd;
+
+ cxlsd = to_cxl_switch_decoder(&cxld->dev);
+ if (cxl_rr->nr_targets > cxlsd->nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
+ dev_name(port->uport_dev), dev_name(&port->dev),
+ dev_name(&cxld->dev), dev_name(&cxlmd->dev),
+ dev_name(&cxled->cxld.dev), pos,
+ cxlsd->nr_targets);
+ rc = -ENXIO;
+ goto out_erase;
+ }
+ }
+
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
dev_dbg(&cxlr->dev,
@@ -1210,6 +1230,50 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled,
return 0;
}
+static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ unsigned int interleave_mask;
+ u8 eiw;
+ u16 eig;
+ int high_pos, low_pos;
+
+ if (!test_bit(iw, &cxlhdm->iw_cap_mask))
+ return -ENXIO;
+ /*
+ * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
+ * if eiw < 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ *
+ * if eiw >= 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ */
+ ways_to_eiw(iw, &eiw);
+ if (eiw == 0 || eiw == 8)
+ return 0;
+
+ granularity_to_eig(ig, &eig);
+ if (eiw > 8)
+ high_pos = eiw + eig - 1;
+ else
+ high_pos = eiw + eig + 7;
+ low_pos = eig + 8;
+ interleave_mask = GENMASK(high_pos, low_pos);
+ if (interleave_mask & ~cxlhdm->interleave_mask)
+ return -ENXIO;
+
+ return 0;
+}
+
static int cxl_port_setup_targets(struct cxl_port *port,
struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
@@ -1360,6 +1424,15 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return -ENXIO;
}
} else {
+ rc = check_interleave_cap(cxld, iw, ig);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s iw: %d ig: %d is not supported\n",
+ dev_name(port->uport_dev),
+ dev_name(&port->dev), iw, ig);
+ return rc;
+ }
+
cxld->interleave_ways = iw;
cxld->interleave_granularity = ig;
cxld->hpa_range = (struct range) {
@@ -1796,6 +1869,15 @@ static int cxl_region_attach(struct cxl_region *cxlr,
struct cxl_dport *dport;
int rc = -ENXIO;
+ rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
+ p->interleave_granularity);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
+ dev_name(&cxled->cxld.dev), p->interleave_ways,
+ p->interleave_granularity);
+ return rc;
+ }
+
if (cxled->mode != cxlr->mode) {
dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
@@ -2688,22 +2770,33 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg)
{
struct cxl_dpa_to_region_context *ctx = arg;
struct cxl_endpoint_decoder *cxled;
+ struct cxl_region *cxlr;
u64 dpa = ctx->dpa;
if (!is_endpoint_decoder(dev))
return 0;
cxled = to_cxl_endpoint_decoder(dev);
- if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
return 0;
if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
return 0;
- dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
- dev_name(&cxled->cxld.region->dev));
+ /*
+ * Stop the region search (return 1) when an endpoint mapping is
+ * found. The region may not be fully constructed so offering
+ * the cxlr in the context structure is not guaranteed.
+ */
+ cxlr = cxled->cxld.region;
+ if (cxlr)
+ dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
+ dev_name(&cxlr->dev));
+ else
+ dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
+ dev_name(dev));
- ctx->cxlr = cxled->cxld.region;
+ ctx->cxlr = cxlr;
return 1;
}
@@ -2847,7 +2940,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
if (!cxl_nvb)
return -ENODEV;
cxlr->cxl_nvb = cxl_nvb;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 603c0120cff8..a6613a6f8923 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -47,6 +47,8 @@ extern const struct nvdimm_security_ops *cxl_security_ops;
#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
+#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
#define CXL_HDM_DECODER_ENABLE BIT(1)
#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
@@ -855,8 +857,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 19aba81cdf13..af8169ccdbc0 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -395,9 +395,9 @@ enum cxl_devtype {
/**
* struct cxl_dpa_perf - DPA performance property entry
- * @dpa_range - range for DPA address
- * @coord - QoS performance data (i.e. latency, bandwidth)
- * @qos_class - QoS Class cookies
+ * @dpa_range: range for DPA address
+ * @coord: QoS performance data (i.e. latency, bandwidth)
+ * @qos_class: QoS Class cookies
*/
struct cxl_dpa_perf {
struct range dpa_range;
@@ -464,13 +464,14 @@ struct cxl_dev_state {
* @active_persistent_bytes: sum of hard + soft persistent
* @next_volatile_bytes: volatile capacity change pending device reset
* @next_persistent_bytes: persistent capacity change pending device reset
+ * @ram_perf: performance data entry matched to RAM partition
+ * @pmem_perf: performance data entry matched to PMEM partition
* @event: event log driver state
* @poison: poison driver state info
* @security: security driver state info
* @fw: firmware upload / activation state
+ * @mbox_wait: RCU wait for mbox send completely
* @mbox_send: @dev specific transport for transmitting mailbox commands
- * @ram_perf: performance data entry matched to RAM partition
- * @pmem_perf: performance data entry matched to PMEM partition
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
@@ -851,11 +852,21 @@ static inline void cxl_mem_active_dec(void)
int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd);
+/**
+ * struct cxl_hdm - HDM Decoder registers and cached / decoded capabilities
+ * @regs: mapped registers, see devm_cxl_setup_hdm()
+ * @decoder_count: number of decoders for this port
+ * @target_count: for switch decoders, max downstream port targets
+ * @interleave_mask: interleave granularity capability, see check_interleave_cap()
+ * @iw_cap_mask: bitmask of supported interleave ways, see check_interleave_cap()
+ * @port: mapped cxl_port, see devm_cxl_setup_hdm()
+ */
struct cxl_hdm {
struct cxl_component_regs regs;
unsigned int decoder_count;
unsigned int target_count;
unsigned int interleave_mask;
+ unsigned long iw_cap_mask;
struct cxl_port *port;
};
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 0c79d9ce877c..2f1b49bfe162 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
+ if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
+ if (rc) {
+ if (rc == -ENODEV)
+ dev_info(dev, "PMEM disabled by platform\n");
+ return rc;
+ }
+ }
+
if (dport->rch)
endpoint_parent = parent_port->uport_dev;
else
@@ -174,14 +183,6 @@ unlock:
if (rc)
return rc;
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
- rc = devm_cxl_add_nvdimm(cxlmd);
- if (rc == -ENODEV)
- dev_info(dev, "PMEM disabled by platform\n");
- else
- return rc;
- }
-
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 002a5ec80620..9fc99cfbef08 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -394,7 +394,7 @@ config LS2X_APB_DMA
config MCF_EDMA
tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs"
- depends on M5441x || COMPILE_TEST
+ depends on M5441x || (COMPILE_TEST && FSL_EDMA=n)
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 8dc029c86551..fc049c9c9892 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -611,11 +611,13 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
spin_unlock(&irq_entry->list_lock);
- list_for_each_entry(desc, &flist, list) {
+ list_for_each_entry_safe(desc, n, &flist, list) {
/*
* Check against the original status as ABORT is software defined
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
+ list_del(&desc->list);
+
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, true);
continue;
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 9c364e92cb82..e8f45a7fded4 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -534,18 +534,6 @@ err_out:
return err;
}
-static int ioat_register(struct ioatdma_device *ioat_dma)
-{
- int err = dma_async_device_register(&ioat_dma->dma_dev);
-
- if (err) {
- ioat_disable_interrupts(ioat_dma);
- dma_pool_destroy(ioat_dma->completion_pool);
- }
-
- return err;
-}
-
static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
{
struct dma_device *dma = &ioat_dma->dma_dev;
@@ -1181,9 +1169,9 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
}
- err = ioat_register(ioat_dma);
+ err = dma_async_device_register(&ioat_dma->dma_dev);
if (err)
- return err;
+ goto err_disable_interrupts;
ioat_kobject_add(ioat_dma, &ioat_ktype);
@@ -1192,20 +1180,29 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
/* disable relaxed ordering */
err = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &val16);
- if (err)
- return pcibios_err_to_errno(err);
+ if (err) {
+ err = pcibios_err_to_errno(err);
+ goto err_disable_interrupts;
+ }
/* clear relaxed ordering enable */
val16 &= ~PCI_EXP_DEVCTL_RELAX_EN;
err = pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, val16);
- if (err)
- return pcibios_err_to_errno(err);
+ if (err) {
+ err = pcibios_err_to_errno(err);
+ goto err_disable_interrupts;
+ }
if (ioat_dma->cap & IOAT_CAP_DPS)
writeb(ioat_pending_level + 1,
ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET);
return 0;
+
+err_disable_interrupts:
+ ioat_disable_interrupts(ioat_dma);
+ dma_pool_destroy(ioat_dma->completion_pool);
+ return err;
}
static void ioat_shutdown(struct pci_dev *pdev)
@@ -1350,6 +1347,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
void __iomem * const *iomap;
struct device *dev = &pdev->dev;
struct ioatdma_device *device;
+ unsigned int i;
+ u8 version;
int err;
err = pcim_enable_device(pdev);
@@ -1363,6 +1362,10 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!iomap)
return -ENOMEM;
+ version = readb(iomap[IOAT_MMIO_BAR] + IOAT_VER_OFFSET);
+ if (version < IOAT_VER_3_0)
+ return -ENODEV;
+
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err)
return err;
@@ -1373,17 +1376,18 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_master(pdev);
pci_set_drvdata(pdev, device);
- device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+ device->version = version;
if (device->version >= IOAT_VER_3_4)
ioat_dca_enabled = 0;
- if (device->version >= IOAT_VER_3_0) {
- if (is_skx_ioat(pdev))
- device->version = IOAT_VER_3_2;
- err = ioat3_dma_probe(device, ioat_dca_enabled);
- } else
- return -ENODEV;
+ if (is_skx_ioat(pdev))
+ device->version = IOAT_VER_3_2;
+
+ err = ioat3_dma_probe(device, ioat_dca_enabled);
if (err) {
+ for (i = 0; i < IOAT_MAX_CHANS; i++)
+ kfree(device->idx[i]);
+ kfree(device);
dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
return -ENODEV;
}
@@ -1445,6 +1449,7 @@ module_init(ioat_init_module);
static void __exit ioat_exit_module(void)
{
pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat_sed_cache);
kmem_cache_destroy(ioat_cache);
}
module_exit(ioat_exit_module);
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
index c9b93055dc9d..f0a399cf45b2 100644
--- a/drivers/dma/ti/k3-udma-glue.c
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -200,12 +200,9 @@ of_k3_udma_glue_parse_chn_by_id(struct device_node *udmax_np, struct k3_udma_glu
ret = of_k3_udma_glue_parse(udmax_np, common);
if (ret)
- goto out_put_spec;
+ return ret;
ret = of_k3_udma_glue_parse_chn_common(common, thread_id, tx_chn);
-
-out_put_spec:
- of_node_put(udmax_np);
return ret;
}
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index e143a7330816..718842fdaf98 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -885,11 +885,11 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
u32 st;
bool repeat_tx;
+ spin_lock(&xchan->vchan.lock);
+
if (xchan->stop_requested)
complete(&xchan->last_interrupt);
- spin_lock(&xchan->vchan.lock);
-
/* get submitted request */
vd = vchan_next_desc(&xchan->vchan);
if (!vd)
diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c
index 0d139e4de37c..8a347b938406 100644
--- a/drivers/firmware/cirrus/cs_dsp.c
+++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -1107,9 +1107,16 @@ struct cs_dsp_coeff_parsed_coeff {
int len;
};
-static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str)
+static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, unsigned int avail,
+ const u8 **str)
{
- int length;
+ int length, total_field_len;
+
+ /* String fields are at least one __le32 */
+ if (sizeof(__le32) > avail) {
+ *pos = NULL;
+ return 0;
+ }
switch (bytes) {
case 1:
@@ -1122,10 +1129,16 @@ static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str)
return 0;
}
+ total_field_len = ((length + bytes) + 3) & ~0x03;
+ if ((unsigned int)total_field_len > avail) {
+ *pos = NULL;
+ return 0;
+ }
+
if (str)
*str = *pos + bytes;
- *pos += ((length + bytes) + 3) & ~0x03;
+ *pos += total_field_len;
return length;
}
@@ -1150,71 +1163,134 @@ static int cs_dsp_coeff_parse_int(int bytes, const u8 **pos)
return val;
}
-static inline void cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, const u8 **data,
- struct cs_dsp_coeff_parsed_alg *blk)
+static int cs_dsp_coeff_parse_alg(struct cs_dsp *dsp,
+ const struct wmfw_region *region,
+ struct cs_dsp_coeff_parsed_alg *blk)
{
const struct wmfw_adsp_alg_data *raw;
+ unsigned int data_len = le32_to_cpu(region->len);
+ unsigned int pos;
+ const u8 *tmp;
+
+ raw = (const struct wmfw_adsp_alg_data *)region->data;
switch (dsp->fw_ver) {
case 0:
case 1:
- raw = (const struct wmfw_adsp_alg_data *)*data;
- *data = raw->data;
+ if (sizeof(*raw) > data_len)
+ return -EOVERFLOW;
blk->id = le32_to_cpu(raw->id);
blk->name = raw->name;
- blk->name_len = strlen(raw->name);
+ blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name));
blk->ncoeff = le32_to_cpu(raw->ncoeff);
+
+ pos = sizeof(*raw);
break;
default:
- blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), data);
- blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), data,
+ if (sizeof(raw->id) > data_len)
+ return -EOVERFLOW;
+
+ tmp = region->data;
+ blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), &tmp);
+ pos = tmp - region->data;
+
+ tmp = &region->data[pos];
+ blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos,
&blk->name);
- cs_dsp_coeff_parse_string(sizeof(u16), data, NULL);
- blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), data);
+ if (!tmp)
+ return -EOVERFLOW;
+
+ pos = tmp - region->data;
+ cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL);
+ if (!tmp)
+ return -EOVERFLOW;
+
+ pos = tmp - region->data;
+ if (sizeof(raw->ncoeff) > (data_len - pos))
+ return -EOVERFLOW;
+
+ blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), &tmp);
+ pos += sizeof(raw->ncoeff);
break;
}
+ if ((int)blk->ncoeff < 0)
+ return -EOVERFLOW;
+
cs_dsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id);
cs_dsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name);
cs_dsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff);
+
+ return pos;
}
-static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data,
- struct cs_dsp_coeff_parsed_coeff *blk)
+static int cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp,
+ const struct wmfw_region *region,
+ unsigned int pos,
+ struct cs_dsp_coeff_parsed_coeff *blk)
{
const struct wmfw_adsp_coeff_data *raw;
+ unsigned int data_len = le32_to_cpu(region->len);
+ unsigned int blk_len, blk_end_pos;
const u8 *tmp;
- int length;
+
+ raw = (const struct wmfw_adsp_coeff_data *)&region->data[pos];
+ if (sizeof(raw->hdr) > (data_len - pos))
+ return -EOVERFLOW;
+
+ blk_len = le32_to_cpu(raw->hdr.size);
+ if (blk_len > S32_MAX)
+ return -EOVERFLOW;
+
+ if (blk_len > (data_len - pos - sizeof(raw->hdr)))
+ return -EOVERFLOW;
+
+ blk_end_pos = pos + sizeof(raw->hdr) + blk_len;
+
+ blk->offset = le16_to_cpu(raw->hdr.offset);
+ blk->mem_type = le16_to_cpu(raw->hdr.type);
switch (dsp->fw_ver) {
case 0:
case 1:
- raw = (const struct wmfw_adsp_coeff_data *)*data;
- *data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size);
+ if (sizeof(*raw) > (data_len - pos))
+ return -EOVERFLOW;
- blk->offset = le16_to_cpu(raw->hdr.offset);
- blk->mem_type = le16_to_cpu(raw->hdr.type);
blk->name = raw->name;
- blk->name_len = strlen(raw->name);
+ blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name));
blk->ctl_type = le16_to_cpu(raw->ctl_type);
blk->flags = le16_to_cpu(raw->flags);
blk->len = le32_to_cpu(raw->len);
break;
default:
- tmp = *data;
- blk->offset = cs_dsp_coeff_parse_int(sizeof(raw->hdr.offset), &tmp);
- blk->mem_type = cs_dsp_coeff_parse_int(sizeof(raw->hdr.type), &tmp);
- length = cs_dsp_coeff_parse_int(sizeof(raw->hdr.size), &tmp);
- blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp,
+ pos += sizeof(raw->hdr);
+ tmp = &region->data[pos];
+ blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos,
&blk->name);
- cs_dsp_coeff_parse_string(sizeof(u8), &tmp, NULL);
- cs_dsp_coeff_parse_string(sizeof(u16), &tmp, NULL);
+ if (!tmp)
+ return -EOVERFLOW;
+
+ pos = tmp - region->data;
+ cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, NULL);
+ if (!tmp)
+ return -EOVERFLOW;
+
+ pos = tmp - region->data;
+ cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL);
+ if (!tmp)
+ return -EOVERFLOW;
+
+ pos = tmp - region->data;
+ if (sizeof(raw->ctl_type) + sizeof(raw->flags) + sizeof(raw->len) >
+ (data_len - pos))
+ return -EOVERFLOW;
+
blk->ctl_type = cs_dsp_coeff_parse_int(sizeof(raw->ctl_type), &tmp);
+ pos += sizeof(raw->ctl_type);
blk->flags = cs_dsp_coeff_parse_int(sizeof(raw->flags), &tmp);
+ pos += sizeof(raw->flags);
blk->len = cs_dsp_coeff_parse_int(sizeof(raw->len), &tmp);
-
- *data = *data + sizeof(raw->hdr) + length;
break;
}
@@ -1224,6 +1300,8 @@ static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data,
cs_dsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags);
cs_dsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type);
cs_dsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len);
+
+ return blk_end_pos;
}
static int cs_dsp_check_coeff_flags(struct cs_dsp *dsp,
@@ -1247,12 +1325,16 @@ static int cs_dsp_parse_coeff(struct cs_dsp *dsp,
struct cs_dsp_alg_region alg_region = {};
struct cs_dsp_coeff_parsed_alg alg_blk;
struct cs_dsp_coeff_parsed_coeff coeff_blk;
- const u8 *data = region->data;
- int i, ret;
+ int i, pos, ret;
+
+ pos = cs_dsp_coeff_parse_alg(dsp, region, &alg_blk);
+ if (pos < 0)
+ return pos;
- cs_dsp_coeff_parse_alg(dsp, &data, &alg_blk);
for (i = 0; i < alg_blk.ncoeff; i++) {
- cs_dsp_coeff_parse_coeff(dsp, &data, &coeff_blk);
+ pos = cs_dsp_coeff_parse_coeff(dsp, region, pos, &coeff_blk);
+ if (pos < 0)
+ return pos;
switch (coeff_blk.ctl_type) {
case WMFW_CTL_TYPE_BYTES:
@@ -1321,6 +1403,10 @@ static unsigned int cs_dsp_adsp1_parse_sizes(struct cs_dsp *dsp,
const struct wmfw_adsp1_sizes *adsp1_sizes;
adsp1_sizes = (void *)&firmware->data[pos];
+ if (sizeof(*adsp1_sizes) > firmware->size - pos) {
+ cs_dsp_err(dsp, "%s: file truncated\n", file);
+ return 0;
+ }
cs_dsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file,
le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm),
@@ -1337,6 +1423,10 @@ static unsigned int cs_dsp_adsp2_parse_sizes(struct cs_dsp *dsp,
const struct wmfw_adsp2_sizes *adsp2_sizes;
adsp2_sizes = (void *)&firmware->data[pos];
+ if (sizeof(*adsp2_sizes) > firmware->size - pos) {
+ cs_dsp_err(dsp, "%s: file truncated\n", file);
+ return 0;
+ }
cs_dsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file,
le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym),
@@ -1376,7 +1466,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
struct regmap *regmap = dsp->regmap;
unsigned int pos = 0;
const struct wmfw_header *header;
- const struct wmfw_adsp1_sizes *adsp1_sizes;
const struct wmfw_footer *footer;
const struct wmfw_region *region;
const struct cs_dsp_region *mem;
@@ -1392,10 +1481,8 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
ret = -EINVAL;
- pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer);
- if (pos >= firmware->size) {
- cs_dsp_err(dsp, "%s: file too short, %zu bytes\n",
- file, firmware->size);
+ if (sizeof(*header) >= firmware->size) {
+ ret = -EOVERFLOW;
goto out_fw;
}
@@ -1423,22 +1510,36 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
pos = sizeof(*header);
pos = dsp->ops->parse_sizes(dsp, file, pos, firmware);
+ if ((pos == 0) || (sizeof(*footer) > firmware->size - pos)) {
+ ret = -EOVERFLOW;
+ goto out_fw;
+ }
footer = (void *)&firmware->data[pos];
pos += sizeof(*footer);
if (le32_to_cpu(header->len) != pos) {
- cs_dsp_err(dsp, "%s: unexpected header length %d\n",
- file, le32_to_cpu(header->len));
+ ret = -EOVERFLOW;
goto out_fw;
}
cs_dsp_dbg(dsp, "%s: timestamp %llu\n", file,
le64_to_cpu(footer->timestamp));
- while (pos < firmware->size &&
- sizeof(*region) < firmware->size - pos) {
+ while (pos < firmware->size) {
+ /* Is there enough data for a complete block header? */
+ if (sizeof(*region) > firmware->size - pos) {
+ ret = -EOVERFLOW;
+ goto out_fw;
+ }
+
region = (void *)&(firmware->data[pos]);
+
+ if (le32_to_cpu(region->len) > firmware->size - pos - sizeof(*region)) {
+ ret = -EOVERFLOW;
+ goto out_fw;
+ }
+
region_name = "Unknown";
reg = 0;
text = NULL;
@@ -1495,16 +1596,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
regions, le32_to_cpu(region->len), offset,
region_name);
- if (le32_to_cpu(region->len) >
- firmware->size - pos - sizeof(*region)) {
- cs_dsp_err(dsp,
- "%s.%d: %s region len %d bytes exceeds file length %zu\n",
- file, regions, region_name,
- le32_to_cpu(region->len), firmware->size);
- ret = -EINVAL;
- goto out_fw;
- }
-
if (text) {
memcpy(text, region->data, le32_to_cpu(region->len));
cs_dsp_info(dsp, "%s: %s\n", file, text);
@@ -1555,6 +1646,9 @@ out_fw:
cs_dsp_buf_free(&buf_list);
kfree(text);
+ if (ret == -EOVERFLOW)
+ cs_dsp_err(dsp, "%s: file content overflows file data\n", file);
+
return ret;
}
@@ -2122,10 +2216,20 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware
pos = le32_to_cpu(hdr->len);
blocks = 0;
- while (pos < firmware->size &&
- sizeof(*blk) < firmware->size - pos) {
+ while (pos < firmware->size) {
+ /* Is there enough data for a complete block header? */
+ if (sizeof(*blk) > firmware->size - pos) {
+ ret = -EOVERFLOW;
+ goto out_fw;
+ }
+
blk = (void *)(&firmware->data[pos]);
+ if (le32_to_cpu(blk->len) > firmware->size - pos - sizeof(*blk)) {
+ ret = -EOVERFLOW;
+ goto out_fw;
+ }
+
type = le16_to_cpu(blk->type);
offset = le16_to_cpu(blk->offset);
version = le32_to_cpu(blk->ver) >> 8;
@@ -2222,17 +2326,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware
}
if (reg) {
- if (le32_to_cpu(blk->len) >
- firmware->size - pos - sizeof(*blk)) {
- cs_dsp_err(dsp,
- "%s.%d: %s region len %d bytes exceeds file length %zu\n",
- file, blocks, region_name,
- le32_to_cpu(blk->len),
- firmware->size);
- ret = -EINVAL;
- goto out_fw;
- }
-
buf = cs_dsp_buf_alloc(blk->data,
le32_to_cpu(blk->len),
&buf_list);
@@ -2272,6 +2365,10 @@ out_fw:
regmap_async_complete(regmap);
cs_dsp_buf_free(&buf_list);
kfree(text);
+
+ if (ret == -EOVERFLOW)
+ cs_dsp_err(dsp, "%s: file content overflows file data\n", file);
+
return ret;
}
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index 3365944f7965..34109fd86c55 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -15,10 +15,6 @@
#include <asm/early_ioremap.h>
#include <asm/efi.h>
-#ifndef __efi_memmap_free
-#define __efi_memmap_free(phys, size, flags) do { } while (0)
-#endif
-
/**
* __efi_memmap_init - Common code for mapping the EFI memory map
* @data: EFI memory map data
@@ -51,11 +47,6 @@ int __init __efi_memmap_init(struct efi_memory_map_data *data)
return -ENOMEM;
}
- if (efi.memmap.flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB))
- __efi_memmap_free(efi.memmap.phys_map,
- efi.memmap.desc_size * efi.memmap.nr_map,
- efi.memmap.flags);
-
map.phys_map = data->phys_map;
map.nr_map = data->size / data->desc_size;
map.map_end = map.map + data->size;
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index d9629ff87861..2328ca58bba6 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -497,10 +497,12 @@ int psci_cpu_suspend_enter(u32 state)
static int psci_system_suspend(unsigned long unused)
{
+ int err;
phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume);
- return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND),
+ err = invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND),
pa_cpu_resume, 0, 0);
+ return psci_to_linux_errno(err);
}
static int psci_system_suspend_enter(suspend_state_t state)
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 880ffcb50088..921f61507ae8 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si)
if (IS_ERR(pdev)) {
return ERR_CAST(pdev);
} else if (pdev) {
- if (!sysfb_pci_dev_is_enabled(pdev))
+ if (!sysfb_pci_dev_is_enabled(pdev)) {
+ pci_dev_put(pdev);
return ERR_PTR(-ENODEV);
+ }
return &pdev->dev;
}
@@ -137,7 +139,7 @@ static __init int sysfb_init(void)
if (compatible) {
pd = sysfb_create_simplefb(si, &mode, parent);
if (!IS_ERR(pd))
- goto unlock_mutex;
+ goto put_device;
}
/* if the FB is incompatible, create a legacy framebuffer device */
@@ -155,7 +157,7 @@ static __init int sysfb_init(void)
pd = platform_device_alloc(name, 0);
if (!pd) {
ret = -ENOMEM;
- goto unlock_mutex;
+ goto put_device;
}
pd->dev.parent = parent;
@@ -170,9 +172,11 @@ static __init int sysfb_init(void)
if (ret)
goto err;
- goto unlock_mutex;
+ goto put_device;
err:
platform_device_put(pd);
+put_device:
+ put_device(parent);
unlock_mutex:
mutex_unlock(&disable_lock);
return ret;
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index bb499e362912..1d0175d6350b 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -225,6 +225,11 @@ static int davinci_gpio_probe(struct platform_device *pdev)
else
nirq = DIV_ROUND_UP(ngpio, 16);
+ if (nirq > MAX_INT_PER_BANK) {
+ dev_err(dev, "Too many IRQs!\n");
+ return -EINVAL;
+ }
+
chips = devm_kzalloc(dev, sizeof(*chips), GFP_KERNEL);
if (!chips)
return -ENOMEM;
diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c
index c693fe05d50f..f2e911a3d2ca 100644
--- a/drivers/gpio/gpio-graniterapids.c
+++ b/drivers/gpio/gpio-graniterapids.c
@@ -296,6 +296,8 @@ static int gnr_gpio_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
+ raw_spin_lock_init(&priv->lock);
+
regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(regs))
return PTR_ERR(regs);
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 71e1af7c2184..d89e78f0ead3 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
ret = gpiochip_get_ngpios(gc, dev);
if (ret)
gc->ngpio = gc->bgpio_bits;
- else
- gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8));
ret = bgpio_setup_io(gc, dat, set, clr, flags);
if (ret)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 77a2812f2974..732a6964748c 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -758,6 +758,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
int level;
if (chip->driver_data & PCA_PCAL) {
+ guard(mutex)(&chip->i2c_lock);
+
/* Enable latch on interrupt-enabled inputs */
pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 9dad67ea2597..5639abce6ec5 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -89,6 +89,10 @@ struct linehandle_state {
GPIOHANDLE_REQUEST_OPEN_DRAIN | \
GPIOHANDLE_REQUEST_OPEN_SOURCE)
+#define GPIOHANDLE_REQUEST_DIRECTION_FLAGS \
+ (GPIOHANDLE_REQUEST_INPUT | \
+ GPIOHANDLE_REQUEST_OUTPUT)
+
static int linehandle_validate_flags(u32 flags)
{
/* Return an error if an unknown flag is set */
@@ -169,21 +173,21 @@ static long linehandle_set_config(struct linehandle_state *lh,
if (ret)
return ret;
+ /* Lines must be reconfigured explicitly as input or output. */
+ if (!(lflags & GPIOHANDLE_REQUEST_DIRECTION_FLAGS))
+ return -EINVAL;
+
for (i = 0; i < lh->num_descs; i++) {
desc = lh->descs[i];
- linehandle_flags_to_desc_flags(gcnf.flags, &desc->flags);
+ linehandle_flags_to_desc_flags(lflags, &desc->flags);
- /*
- * Lines have to be requested explicitly for input
- * or output, else the line will be treated "as is".
- */
if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
int val = !!gcnf.default_values[i];
ret = gpiod_direction_output(desc, val);
if (ret)
return ret;
- } else if (lflags & GPIOHANDLE_REQUEST_INPUT) {
+ } else {
ret = gpiod_direction_input(desc);
if (ret)
return ret;
@@ -1530,12 +1534,14 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
line = &lr->lines[i];
desc = lr->lines[i].desc;
flags = gpio_v2_line_config_flags(&lc, i);
- gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags);
- edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS;
/*
- * Lines have to be requested explicitly for input
- * or output, else the line will be treated "as is".
+ * Lines not explicitly reconfigured as input or output
+ * are left unchanged.
*/
+ if (!(flags & GPIO_V2_LINE_DIRECTION_FLAGS))
+ continue;
+ gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags);
+ edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS;
if (flags & GPIO_V2_LINE_FLAG_OUTPUT) {
int val = gpio_v2_line_config_output_value(&lc, i);
@@ -1543,7 +1549,7 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
ret = gpiod_direction_output(desc, val);
if (ret)
return ret;
- } else if (flags & GPIO_V2_LINE_FLAG_INPUT) {
+ } else {
ret = gpiod_direction_input(desc);
if (ret)
return ret;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d75f6ee37028..89d5e64cf68b 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -203,6 +203,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
*/
{ "qi,lb60", "rb-gpios", true },
#endif
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
+ /*
+ * According to the PCI specification, the RST# pin is an
+ * active-low signal. However, most of the device trees that
+ * have been widely used for a long time incorrectly describe
+ * reset GPIO as active-high, and were also using wrong name
+ * for the property.
+ */
+ { "lantiq,pci-xway", "gpio-reset", false },
+#endif
+#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005)
+ /*
+ * DTS for Nokia N900 incorrectly specified "active high"
+ * polarity for the reset line, while the chip actually
+ * treats it as "active low".
+ */
+ { "ti,tsc2005", "reset-gpios", false },
+#endif
};
unsigned int i;
@@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
{ "reset", "reset-n-io", "marvell,nfc-uart" },
{ "reset", "reset-n-io", "mrvl,nfc-uart" },
#endif
-#if !IS_ENABLED(CONFIG_PCI_LANTIQ)
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
/* MIPS Lantiq PCI */
- { "reset", "gpios-reset", "lantiq,pci-xway" },
+ { "reset", "gpio-reset", "lantiq,pci-xway" },
#endif
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 108003bdf1e9..2e13c7c4b2b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -400,7 +400,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
+ *vram_width = mem_channel_number * 16;
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec888fc6ead8..916b6b8cf7d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1093,6 +1093,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
unsigned int i;
int r;
+ /*
+ * We can't use gang submit on with reserved VMIDs when the VM changes
+ * can't be invalidated by more than one engine at the same time.
+ */
+ if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) {
+ for (i = 0; i < p->gang_size; ++i) {
+ struct drm_sched_entity *entity = p->entities[i];
+ struct drm_gpu_scheduler *sched = entity->rq->sched;
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched);
+
+ if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
+ return -EINVAL;
+ }
+ }
+
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 932dc93b2e63..33f791d92ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5220,11 +5220,14 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
dev_info(adev->dev, "GPU mode1 reset\n");
+ /* Cache the state before bus master disable. The saved config space
+ * values are used in other cases like restore after mode-2 reset.
+ */
+ amdgpu_device_cache_pci_state(adev->pdev);
+
/* disable BM */
pci_clear_master(adev->pdev);
- amdgpu_device_cache_pci_state(adev->pdev);
-
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
dev_info(adev->dev, "GPU smu mode1 reset\n");
ret = amdgpu_dpm_mode1_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 055ba2ea4c12..662d0f28f358 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -41,8 +41,6 @@
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
-#include <linux/pm_runtime.h>
-#include "amdgpu_trace.h"
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int r;
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(1, __func__);
- if (r < 0)
- goto out;
-
return 0;
-
-out:
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
- return r;
-}
-
-/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
- *
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
- *
- * Called when an attachment is removed from the DMA-buf.
- */
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
-{
- struct drm_gem_object *obj = dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
}
/**
@@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
- .detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 10832b470448..bc3ac73b6b8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(1, __func__);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
struct dma_fence *old;
@@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
dma_fence_put(fence);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
} while (last_seq != seq);
return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index be4629cdac04..08b9dfb65335 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
unsigned int ndw;
- signed long r;
+ int r;
uint32_t seq;
- if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
- !down_read_trylock(&adev->reset_domain->sem)) {
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return 0;
+ if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
if (adev->gmc.flush_tlb_needs_extra_type_2)
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
2, all_hub,
@@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
flush_type, all_hub,
inst);
- return 0;
- }
+ r = 0;
+ } else {
+ /* 2 dwords flush + 8 dwords fence */
+ ndw = kiq->pmf->invalidate_tlbs_size + 8;
- /* 2 dwords flush + 8 dwords fence */
- ndw = kiq->pmf->invalidate_tlbs_size + 8;
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ ndw += kiq->pmf->invalidate_tlbs_size;
- if (adev->gmc.flush_tlb_needs_extra_type_2)
- ndw += kiq->pmf->invalidate_tlbs_size;
+ if (adev->gmc.flush_tlb_needs_extra_type_0)
+ ndw += kiq->pmf->invalidate_tlbs_size;
- if (adev->gmc.flush_tlb_needs_extra_type_0)
- ndw += kiq->pmf->invalidate_tlbs_size;
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ amdgpu_ring_alloc(ring, ndw);
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
- spin_lock(&adev->gfx.kiq[inst].ring_lock);
- amdgpu_ring_alloc(ring, ndw);
- if (adev->gmc.flush_tlb_needs_extra_type_2)
- kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+ if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
- if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
- kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
- kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
+ amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
- goto error_unlock_reset;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq[inst].ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- r = -ETIME;
- goto error_unlock_reset;
+ if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
+ dev_err(adev->dev, "timeout waiting for kiq fence\n");
+ r = -ETIME;
+ }
}
- r = 0;
error_unlock_reset:
up_read(&adev->reset_domain->sem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 3d7fcdeaf8cf..e8f6e4dbc5a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -406,7 +406,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle)
goto error;
- if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
+ if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
@@ -456,6 +456,19 @@ error:
return r;
}
+/*
+ * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @vm: the VM to check
+ * @vmhub: the VMHUB which will be used
+ *
+ * Returns: True if the VM will use a reserved VMID.
+ */
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+{
+ return vm->reserved_vmid[vmhub] ||
+ (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)));
+}
+
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index fa8c42c83d5d..240fa6751260 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4bd4602d11b1..cef9dd0a012b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -640,6 +640,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
}
}
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
@@ -699,10 +713,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
dev_warn(psp->adev->dev,
"failed to load ucode %s(0x%X) ",
amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
- dev_warn(psp->adev->dev,
- "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
- psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
/* If any firmware (including CAP) load fails under SRIOV, it should
* return failure to stop the VF from initializing.
* Also return failure in case of timeout
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 7aafeb763e5d..383fce40d4dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
__entry->value)
);
-TRACE_EVENT(amdgpu_runpm_reference_dumps,
- TP_PROTO(uint32_t index, const char *func),
- TP_ARGS(index, func),
- TP_STRUCT__entry(
- __field(uint32_t, index)
- __string(func, func)
- ),
- TP_fast_assign(
- __entry->index = index;
- __assign_str(func);
- ),
- TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
- __entry->index,
- __get_str(func))
-);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index e30eecd02ae1..fde66225c481 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -3,6 +3,7 @@
#include <drm/drm_atomic_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -314,7 +315,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
return 0;
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
@@ -368,12 +375,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
+ struct drm_gem_object *obj;
int r;
if (!old_state->fb)
return;
- rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ obj = drm_gem_fb_get_obj(old_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
DRM_ERROR("failed to reserve rbo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 7b16e8cca86a..f5b9f443cfdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info)
{
- int i, j, k, counter, xcc_id, active_cu_number = 0;
- u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
unsigned disable_masks[4 * 4];
+ bool is_symmetric_cus;
if (!adev || !cu_info)
return -EINVAL;
@@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ is_symmetric_cus = true;
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
@@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
+ if (i && is_symmetric_cus && prev_counter != counter)
+ is_symmetric_cus = false;
+ prev_counter = counter;
+ }
+ if (is_symmetric_cus) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 0d1407f25005..32d4519541c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
- int ndw = size / 4;
- signed long r;
- union MESAPI__MISC *x_pkt = pkt;
- struct MES_API_STATUS *api_status;
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
- unsigned long flags;
- signed long timeout = 3000000; /* 3000 ms */
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str;
- u32 fence_offset;
- u64 fence_gpu_addr;
- u64 *fence_ptr;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 status_offset;
+ u64 *status_ptr;
+ signed long r;
int ret;
if (x_pkt->header.opcode >= MES_SCH_API_MAX)
@@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000;
}
- BUG_ON(size % 4 != 0);
- ret = amdgpu_device_wb_get(adev, &fence_offset);
+ ret = amdgpu_device_wb_get(adev, &status_offset);
if (ret)
return ret;
- fence_gpu_addr =
- adev->wb.gpu_addr + (fence_offset * 4);
- fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
- *fence_ptr = 0;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags);
- if (amdgpu_ring_alloc(ring, ndw)) {
- spin_unlock_irqrestore(&mes->ring_lock, flags);
- amdgpu_device_wb_free(adev, fence_offset);
- return -ENOMEM;
- }
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
- api_status->api_completion_fence_addr = fence_gpu_addr;
+ api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1;
- amdgpu_ring_write_multiple(ring, pkt, ndw);
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value =
+ ++ring->fence_drv.sync_seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
@@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
if (misc_op_str)
- dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
else if (op_str)
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
else
- dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
- r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout);
- amdgpu_device_wb_free(adev, fence_offset);
- if (r < 1) {
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+ if (r < 1 || !*status_ptr) {
if (misc_op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
@@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
while (halt_if_hws_hang)
schedule();
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto error_wb_free;
}
+ amdgpu_device_wb_free(adev, status_offset);
return 0;
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
}
static int convert_to_mes_queue_type(int queue_type)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 7566973ed8f5..37b5ddd6f13b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -464,8 +464,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
index f08a32c18694..40b28298af30 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -32,7 +32,9 @@
#include "mp/mp_14_0_2_sh_mask.h"
MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -66,6 +68,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 5fcd4f778dc3..47b8b49da8a7 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -8,7 +8,7 @@ config DRM_AMD_DC
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
- select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG)
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV))
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f1d67c6f4b98..a622aca8c649 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9169,9 +9169,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
trace_amdgpu_dm_atomic_commit_tail_begin(state);
- if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed)
- dc_allow_idle_optimizations(dm->dc, false);
-
drm_atomic_helper_update_legacy_modeset_state(dev, state);
drm_dp_mst_atomic_wait_for_dependencies(state);
@@ -11184,6 +11181,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
return ret;
}
+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+ struct edid *edid)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+ u16 min_vfreq;
+ u16 max_vfreq;
+
+ if (edid == NULL || edid->extensions == 0)
+ return;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ if (edid_ext == NULL)
+ return;
+
+ while (j < EDID_LENGTH) {
+ /* Get dynamic video timing range from DisplayID if available */
+ if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 &&
+ (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+ min_vfreq = edid_ext[j+9];
+ if (edid_ext[j+1] & 7)
+ max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+ else
+ max_vfreq = edid_ext[j+10];
+
+ if (max_vfreq && min_vfreq) {
+ connector->display_info.monitor_range.max_vfreq = max_vfreq;
+ connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+ return;
+ }
+ }
+ j++;
+ }
+}
+
static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
@@ -11305,6 +11345,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (!adev->dm.freesync_module)
goto update;
+ /* Some eDP panels only have the refresh rate range info in DisplayID */
+ if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+ connector->display_info.monitor_range.max_vfreq == 0))
+ parse_edid_displayid_vrr(connector, edid);
+
if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
sink->sink_signal == SIGNAL_TYPE_EDP)) {
bool edid_check_required = false;
@@ -11312,9 +11357,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (is_dp_capable_without_timing_msa(adev->dm.dc,
amdgpu_dm_connector)) {
if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) {
- freesync_capable = true;
amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+ if (amdgpu_dm_connector->max_vfreq -
+ amdgpu_dm_connector->min_vfreq > 10)
+ freesync_capable = true;
} else {
edid_check_required = edid->version > 1 ||
(edid->version == 1 &&
@@ -11440,6 +11487,12 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev)
mutex_unlock(&adev->dm.dc_lock);
}
+static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc)
+{
+ if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter)
+ dc_exit_ips_for_hw_access(dc);
+}
+
void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
u32 value, const char *func_name)
{
@@ -11450,6 +11503,8 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
return;
}
#endif
+
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
cgs_write_register(ctx->cgs_device, address, value);
trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value);
}
@@ -11473,6 +11528,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return 0;
}
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
+
value = cgs_read_register(ctx->cgs_device, address);
trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 6c84b0fa40f4..0782a34689a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&mode_lib->vba.UrgentBurstFactorLumaPre[k],
&mode_lib->vba.UrgentBurstFactorChromaPre[k],
&mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
}
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 60f251cf973b..beed7adbbd43 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -177,7 +177,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
- .dram_clock_change_latency_us = 11.72,
+ .dram_clock_change_latency_us = 34.0,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
index e4f333d4fb54..a201dbb743d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -215,7 +215,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
- .dram_clock_change_latency_us = 11.72,
+ .dram_clock_change_latency_us = 34,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index a41812598ce8..8ecc972dbffd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -234,6 +234,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 0f8b3336e26d..cbd1c1f26b7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -294,7 +294,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000;
context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
- if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported)
+ if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
else
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 5295f52e4fc8..dcced89c07b3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1439,3 +1439,75 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
}
}
}
+
+static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx)
+{
+ /* Calculate average pixel count per TU, return false if under ~2.00 to
+ * avoid empty TUs. This is only required for DPIA tunneling as empty TUs
+ * are legal to generate for native DP links. Assume TU size 64 as there
+ * is currently no scenario where it's reprogrammed from HW default.
+ * MTPs have no such limitation, so this does not affect MST use cases.
+ */
+ unsigned int pix_clk_mhz;
+ unsigned int symclk_mhz;
+ unsigned int avg_pix_per_tu_x1000;
+ unsigned int tu_size_bytes = 64;
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+ struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings;
+ const struct dc *dc = pipe_ctx->stream->link->dc;
+
+ if (pipe_ctx->stream->link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ return false;
+
+ // Not necessary for MST configurations
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+ return false;
+
+ pix_clk_mhz = timing->pix_clk_100hz / 10000;
+
+ // If this is true, can't block due to dynamic ODM
+ if (pix_clk_mhz > dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz)
+ return false;
+
+ switch (link_settings->link_rate) {
+ case LINK_RATE_LOW:
+ symclk_mhz = 162;
+ break;
+ case LINK_RATE_HIGH:
+ symclk_mhz = 270;
+ break;
+ case LINK_RATE_HIGH2:
+ symclk_mhz = 540;
+ break;
+ case LINK_RATE_HIGH3:
+ symclk_mhz = 810;
+ break;
+ default:
+ // We shouldn't be tunneling any other rates, something is wrong
+ ASSERT(0);
+ return false;
+ }
+
+ avg_pix_per_tu_x1000 = (1000 * pix_clk_mhz * tu_size_bytes)
+ / (symclk_mhz * link_settings->lane_count);
+
+ // Add small empirically-decided margin to account for potential jitter
+ return (avg_pix_per_tu_x1000 < 2020);
+}
+
+bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+ if (!is_h_timing_divisible_by_2(pipe_ctx->stream))
+ return false;
+
+ if (should_avoid_empty_tu(pipe_ctx))
+ return false;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) && !dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) &&
+ dc->debug.enable_dp_dig_pixel_rate_div_policy)
+ return true;
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index a731c8880d60..f0ea7d1511ae 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -95,4 +95,6 @@ void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx,
void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
int num_pipes, uint32_t v_total_min, uint32_t v_total_max);
+bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
#endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index df3bf77f3fb4..199781233fd5 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -158,7 +158,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs = {
.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
- .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+ .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
.dsc_pg_control = dcn35_dsc_pg_control,
.dsc_pg_status = dcn32_dsc_pg_status,
.enable_plane = dcn35_enable_plane,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index a01d0842bf8e..d487dfcd219b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -1590,9 +1590,17 @@ static bool retrieve_link_cap(struct dc_link *link)
return false;
}
- if (dp_is_lttpr_present(link))
+ if (dp_is_lttpr_present(link)) {
configure_lttpr_mode_transparent(link);
+ // Echo TOTAL_LTTPR_CNT back downstream
+ core_link_write_dpcd(
+ link,
+ DP_TOTAL_LTTPR_CNT,
+ &link->dpcd_caps.lttpr_caps.phy_repeater_cnt,
+ sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt));
+ }
+
/* Read DP tunneling information. */
status = dpcd_get_tunneling_device_data(link);
diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
index 914f28e9f224..aee5170f5fb2 100644
--- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h
+++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
@@ -177,4 +177,9 @@ enum dpcd_psr_sink_states {
#define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379
#define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A
+/* Remove once drm_dp_helper.h is updated upstream */
+#ifndef DP_TOTAL_LTTPR_CNT
+#define DP_TOTAL_LTTPR_CNT 0xF000A /* 2.1 */
+#endif
+
#endif /* __DAL_DPCD_DEFS_H__ */
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 571691837200..09cbc3afd6d8 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1
{
struct atom_common_table_header table_header;
/*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */
- struct atom_gpio_pin_assignment gpio_pin[8];
+ struct atom_gpio_pin_assignment gpio_pin[];
};
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 6bb42d04b247..e8b6989a40f3 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -164,6 +164,8 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev,
for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) {
if (table[i].ulSupportedSCLK != 0) {
+ if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES)
+ continue;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit =
table[i].usVoltageID;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit =
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7789b313285c..e1796ecf9c05 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -324,6 +324,18 @@ static int smu_dpm_set_umsch_mm_enable(struct smu_context *smu,
return ret;
}
+static int smu_set_mall_enable(struct smu_context *smu)
+{
+ int ret = 0;
+
+ if (!smu->ppt_funcs->set_mall_enable)
+ return 0;
+
+ ret = smu->ppt_funcs->set_mall_enable(smu);
+
+ return ret;
+}
+
/**
* smu_dpm_set_power_gate - power gate/ungate the specific IP block
*
@@ -1791,6 +1803,7 @@ static int smu_hw_init(void *handle)
smu_dpm_set_jpeg_enable(smu, true);
smu_dpm_set_vpe_enable(smu, true);
smu_dpm_set_umsch_mm_enable(smu, true);
+ smu_set_mall_enable(smu);
smu_set_gfx_cgpg(smu, true);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 0917dec8efe3..64ccdb5f14ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1395,6 +1395,11 @@ struct pptable_funcs {
int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable);
/**
+ * @set_mall_enable: Init MALL power gating control.
+ */
+ int (*set_mall_enable)(struct smu_context *smu);
+
+ /**
* @notify_rlc_state: Notify RLC power state to SMU.
*/
int (*notify_rlc_state)(struct smu_context *smu, bool en);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index c4dc5881d8df..e7f5ef49049f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -106,8 +106,8 @@
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
-#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_MSG_MALLPowerController 0x38 ///< Set MALL control
+#define PPSMC_MSG_MALLPowerState 0x39 ///< Enter/Exit MALL PG
#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index c48214e3dc8e..2e32b085824a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -272,7 +272,9 @@
__SMU_DUMMY_MAP(SetSoftMinVpe), \
__SMU_DUMMY_MAP(GetMetricsVersion), \
__SMU_DUMMY_MAP(EnableUCLKShadow), \
- __SMU_DUMMY_MAP(RmaDueToBadPageThreshold),
+ __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \
+ __SMU_DUMMY_MAP(MALLPowerController), \
+ __SMU_DUMMY_MAP(MALLPowerState),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index e4419e1561ef..18abfbd6d059 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -52,6 +52,19 @@
#define mmMP1_SMN_C2PMSG_90 0x029a
#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0
+/* MALLPowerController message arguments (Defines for the Cache mode control) */
+#define SMU_MALL_PMFW_CONTROL 0
+#define SMU_MALL_DRIVER_CONTROL 1
+
+/*
+ * MALLPowerState message arguments
+ * (Defines for the Allocate/Release Cache mode if in driver mode)
+ */
+#define SMU_MALL_EXIT_PG 0
+#define SMU_MALL_ENTER_PG 1
+
+#define SMU_MALL_PG_CONFIG_DEFAULT SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON
+
#define FEATURE_MASK(feature) (1ULL << feature)
#define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
@@ -66,6 +79,12 @@
FEATURE_MASK(FEATURE_GFX_DPM_BIT) | \
FEATURE_MASK(FEATURE_VPE_DPM_BIT))
+enum smu_mall_pg_config {
+ SMU_MALL_PG_CONFIG_PMFW_CONTROL = 0,
+ SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON = 1,
+ SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF = 2,
+};
+
static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
@@ -113,6 +132,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(PowerDownUmsch, PPSMC_MSG_PowerDownUmsch, 1),
MSG_MAP(SetSoftMaxVpe, PPSMC_MSG_SetSoftMaxVpe, 1),
MSG_MAP(SetSoftMinVpe, PPSMC_MSG_SetSoftMinVpe, 1),
+ MSG_MAP(MALLPowerController, PPSMC_MSG_MALLPowerController, 1),
+ MSG_MAP(MALLPowerState, PPSMC_MSG_MALLPowerState, 1),
};
static struct cmn2asic_mapping smu_v14_0_0_feature_mask_map[SMU_FEATURE_COUNT] = {
@@ -1423,6 +1444,57 @@ static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_cl
return 0;
}
+static int smu_v14_0_1_init_mall_power_gating(struct smu_context *smu, enum smu_mall_pg_config pg_config)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if (pg_config == SMU_MALL_PG_CONFIG_PMFW_CONTROL) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController,
+ SMU_MALL_PMFW_CONTROL, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Init MALL PMFW CONTROL Failure\n");
+ return ret;
+ }
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController,
+ SMU_MALL_DRIVER_CONTROL, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Init MALL Driver CONTROL Failure\n");
+ return ret;
+ }
+
+ if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState,
+ SMU_MALL_EXIT_PG, NULL);
+ if (ret) {
+ dev_err(adev->dev, "EXIT MALL PG Failure\n");
+ return ret;
+ }
+ } else if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState,
+ SMU_MALL_ENTER_PG, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Enter MALL PG Failure\n");
+ return ret;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int smu_v14_0_common_set_mall_enable(struct smu_context *smu)
+{
+ enum smu_mall_pg_config pg_config = SMU_MALL_PG_CONFIG_DEFAULT;
+ int ret = 0;
+
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ ret = smu_v14_0_1_init_mall_power_gating(smu, pg_config);
+
+ return ret;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
@@ -1454,6 +1526,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
.get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
+ .set_mall_enable = smu_v14_0_common_set_mall_enable,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h
index ea271f62b214..ec0b7f3d889c 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511.h
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h
@@ -401,7 +401,7 @@ struct adv7511 {
#ifdef CONFIG_DRM_I2C_ADV7511_CEC
int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511);
-void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1);
+int adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1);
#else
static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511)
{
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
index 44451a9658a3..2e9c88a2b5ed 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
@@ -119,7 +119,7 @@ static void adv7511_cec_rx(struct adv7511 *adv7511, int rx_buf)
cec_received_msg(adv7511->cec_adap, &msg);
}
-void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1)
+int adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1)
{
unsigned int offset = adv7511->info->reg_cec_offset;
const u32 irq_tx_mask = ADV7511_INT1_CEC_TX_READY |
@@ -131,16 +131,19 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1)
unsigned int rx_status;
int rx_order[3] = { -1, -1, -1 };
int i;
+ int irq_status = IRQ_NONE;
- if (irq1 & irq_tx_mask)
+ if (irq1 & irq_tx_mask) {
adv_cec_tx_raw_status(adv7511, irq1);
+ irq_status = IRQ_HANDLED;
+ }
if (!(irq1 & irq_rx_mask))
- return;
+ return irq_status;
if (regmap_read(adv7511->regmap_cec,
ADV7511_REG_CEC_RX_STATUS + offset, &rx_status))
- return;
+ return irq_status;
/*
* ADV7511_REG_CEC_RX_STATUS[5:0] contains the reception order of RX
@@ -172,6 +175,8 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1)
adv7511_cec_rx(adv7511, rx_buf);
}
+
+ return IRQ_HANDLED;
}
static int adv7511_cec_adap_enable(struct cec_adapter *adap, bool enable)
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 66ccb61e2a66..c8d2c4a157b2 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -469,6 +469,8 @@ static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd)
{
unsigned int irq0, irq1;
int ret;
+ int cec_status = IRQ_NONE;
+ int irq_status = IRQ_NONE;
ret = regmap_read(adv7511->regmap, ADV7511_REG_INT(0), &irq0);
if (ret < 0)
@@ -478,29 +480,31 @@ static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd)
if (ret < 0)
return ret;
- /* If there is no IRQ to handle, exit indicating no IRQ data */
- if (!(irq0 & (ADV7511_INT0_HPD | ADV7511_INT0_EDID_READY)) &&
- !(irq1 & ADV7511_INT1_DDC_ERROR))
- return -ENODATA;
-
regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
- if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder)
+ if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder) {
schedule_work(&adv7511->hpd_work);
+ irq_status = IRQ_HANDLED;
+ }
if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
adv7511->edid_read = true;
if (adv7511->i2c_main->irq)
wake_up_all(&adv7511->wq);
+ irq_status = IRQ_HANDLED;
}
#ifdef CONFIG_DRM_I2C_ADV7511_CEC
- adv7511_cec_irq_process(adv7511, irq1);
+ cec_status = adv7511_cec_irq_process(adv7511, irq1);
#endif
- return 0;
+ /* If there is no IRQ to handle, exit indicating no IRQ data */
+ if (irq_status == IRQ_HANDLED || cec_status == IRQ_HANDLED)
+ return IRQ_HANDLED;
+
+ return IRQ_NONE;
}
static irqreturn_t adv7511_irq_handler(int irq, void *devid)
@@ -509,7 +513,7 @@ static irqreturn_t adv7511_irq_handler(int irq, void *devid)
int ret;
ret = adv7511_irq_process(adv7511, true);
- return ret < 0 ? IRQ_NONE : IRQ_HANDLED;
+ return ret < 0 ? IRQ_NONE : ret;
}
/* -----------------------------------------------------------------------------
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d612133e2cf7..117237d3528b 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -524,6 +524,9 @@ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper)
if (!info)
return ERR_PTR(-ENOMEM);
+ if (!drm_leak_fbdev_smem)
+ info->flags |= FBINFO_HIDE_SMEM_START;
+
ret = fb_alloc_cmap(&info->cmap, 256, 0);
if (ret)
goto err_release;
@@ -1860,9 +1863,6 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper)
info = fb_helper->info;
info->var.pixclock = 0;
- if (!drm_leak_fbdev_smem)
- info->flags |= FBINFO_HIDE_SMEM_START;
-
/* Need to drop locks to avoid recursive deadlock in
* register_framebuffer. This is ok because the only thing left to do is
* register the fbdev emulation instance in kernel_fb_helper_list. */
diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c
index 6c9427bb4053..13cd754af311 100644
--- a/drivers/gpu/drm/drm_fbdev_dma.c
+++ b/drivers/gpu/drm/drm_fbdev_dma.c
@@ -130,7 +130,10 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper,
info->flags |= FBINFO_READS_FAST; /* signal caching */
info->screen_size = sizes->surface_height * fb->pitches[0];
info->screen_buffer = map.vaddr;
- info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer));
+ if (!(info->flags & FBINFO_HIDE_SMEM_START)) {
+ if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer)))
+ info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer));
+ }
info->fix.smem_len = info->screen_size;
return 0;
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
index 97e579c33d84..1e200d815e1a 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -84,7 +84,8 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper,
sizes->surface_width, sizes->surface_height,
sizes->surface_bpp);
- format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
+ format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp,
+ sizes->surface_depth);
buffer = drm_client_framebuffer_create(client, sizes->surface_width,
sizes->surface_height, format);
if (IS_ERR(buffer))
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 638ffa4444f5..714e42b05108 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -469,14 +469,12 @@ void drm_file_update_pid(struct drm_file *filp)
dev = filp->minor->dev;
mutex_lock(&dev->filelist_mutex);
+ get_pid(pid);
old = rcu_replace_pointer(filp->pid, pid, 1);
mutex_unlock(&dev->filelist_mutex);
- if (pid != old) {
- get_pid(pid);
- synchronize_rcu();
- put_pid(old);
- }
+ synchronize_rcu();
+ put_pid(old);
}
/**
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 2166208a961d..3860a8ce1e2d 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"),
},
.driver_data = (void *)&lcd1280x1920_rightside_up,
- }, { /* Valve Steam Deck */
+ }, { /* Valve Steam Deck (Jupiter) */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"),
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
},
.driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /* Valve Steam Deck (Galileo) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
}, { /* VIOS LTH17 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"),
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index f08a6803dc18..3adc2c9ab72d 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -311,6 +311,9 @@ static int cdv_intel_lvds_get_modes(struct drm_connector *connector)
if (mode_dev->panel_fixed_mode != NULL) {
struct drm_display_mode *mode =
drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
+ if (!mode)
+ return 0;
+
drm_mode_probed_add(connector, mode);
return 1;
}
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 8486de230ec9..8d1be94a443b 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -504,6 +504,9 @@ static int psb_intel_lvds_get_modes(struct drm_connector *connector)
if (mode_dev->panel_fixed_mode != NULL) {
struct drm_display_mode *mode =
drm_mode_duplicate(dev, mode_dev->panel_fixed_mode);
+ if (!mode)
+ return 0;
+
drm_mode_probed_add(connector, mode);
return 1;
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 3c3fc53376ce..6bff169fa8d4 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2088,6 +2088,9 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port,
u32 ln0, ln1, pin_assignment;
u8 width;
+ if (DISPLAY_VER(dev_priv) >= 14)
+ return;
+
if (!intel_encoder_is_tc(&dig_port->base) ||
intel_tc_port_in_tbt_alt_mode(dig_port))
return;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index e05e25cd4a94..5b3b6ae1e3d7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -442,6 +442,10 @@ bool intel_dp_has_bigjoiner(struct intel_dp *intel_dp)
struct intel_encoder *encoder = &intel_dig_port->base;
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+ /* eDP MSO is not compatible with joiner */
+ if (intel_dp->mso_link_count)
+ return false;
+
return DISPLAY_VER(dev_priv) >= 12 ||
(DISPLAY_VER(dev_priv) == 11 &&
encoder->port != PORT_A);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 40371b8a9bbb..93bc1cc1ee7e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
return;
GEM_BUG_ON(fence->vma != vma);
+ i915_active_wait(&fence->active);
GEM_BUG_ON(!i915_active_is_idle(&fence->active));
GEM_BUG_ON(atomic_read(&fence->pin_count));
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 17a5cca007e2..4bd0baa2a4f5 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -250,29 +250,20 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
if (ret)
goto free_drm;
ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_0);
- if (ret) {
- meson_canvas_free(priv->canvas, priv->canvas_id_osd1);
- goto free_drm;
- }
+ if (ret)
+ goto free_canvas_osd1;
ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_1);
- if (ret) {
- meson_canvas_free(priv->canvas, priv->canvas_id_osd1);
- meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0);
- goto free_drm;
- }
+ if (ret)
+ goto free_canvas_vd1_0;
ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_2);
- if (ret) {
- meson_canvas_free(priv->canvas, priv->canvas_id_osd1);
- meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0);
- meson_canvas_free(priv->canvas, priv->canvas_id_vd1_1);
- goto free_drm;
- }
+ if (ret)
+ goto free_canvas_vd1_1;
priv->vsync_irq = platform_get_irq(pdev, 0);
ret = drm_vblank_init(drm, 1);
if (ret)
- goto free_drm;
+ goto free_canvas_vd1_2;
/* Assign limits per soc revision/package */
for (i = 0 ; i < ARRAY_SIZE(meson_drm_soc_attrs) ; ++i) {
@@ -288,11 +279,11 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
*/
ret = drm_aperture_remove_framebuffers(&meson_driver);
if (ret)
- goto free_drm;
+ goto free_canvas_vd1_2;
ret = drmm_mode_config_init(drm);
if (ret)
- goto free_drm;
+ goto free_canvas_vd1_2;
drm->mode_config.max_width = 3840;
drm->mode_config.max_height = 2160;
drm->mode_config.funcs = &meson_mode_config_funcs;
@@ -307,7 +298,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
if (priv->afbcd.ops) {
ret = priv->afbcd.ops->init(priv);
if (ret)
- goto free_drm;
+ goto free_canvas_vd1_2;
}
/* Encoder Initialization */
@@ -371,6 +362,14 @@ uninstall_irq:
exit_afbcd:
if (priv->afbcd.ops)
priv->afbcd.ops->exit(priv);
+free_canvas_vd1_2:
+ meson_canvas_free(priv->canvas, priv->canvas_id_vd1_2);
+free_canvas_vd1_1:
+ meson_canvas_free(priv->canvas, priv->canvas_id_vd1_1);
+free_canvas_vd1_0:
+ meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0);
+free_canvas_osd1:
+ meson_canvas_free(priv->canvas, priv->canvas_id_osd1);
free_drm:
drm_dev_put(drm);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index 670c9739e5e1..2033214c4b78 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -209,6 +209,8 @@ static int nv17_tv_get_ld_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode;
mode = drm_mode_duplicate(encoder->dev, tv_mode);
+ if (!mode)
+ continue;
mode->clock = tv_norm->tv_enc_mode.vrefresh *
mode->htotal / 1000 *
@@ -258,6 +260,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
if (modes[i].hdisplay == output_mode->hdisplay &&
modes[i].vdisplay == output_mode->vdisplay) {
mode = drm_mode_duplicate(encoder->dev, output_mode);
+ if (!mode)
+ continue;
mode->type |= DRM_MODE_TYPE_PREFERRED;
} else {
@@ -265,6 +269,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
modes[i].vdisplay, 60, false,
(output_mode->flags &
DRM_MODE_FLAG_INTERLACE), false);
+ if (!mode)
+ continue;
}
/* CVT modes are sometimes unsuitable... */
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 856b3ef5edb8..0c71d761d378 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector)
struct drm_display_mode *mode;
mode = drm_mode_duplicate(dev, nv_connector->native_mode);
+ if (!mode)
+ return 0;
+
drm_mode_probed_add(connector, mode);
ret = 1;
}
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index dcb6d0b6ced0..c8cdc8356c58 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2752,6 +2752,7 @@ static const struct display_timing koe_tx26d202vm0bwa_timing = {
.vfront_porch = { 3, 5, 10 },
.vback_porch = { 2, 5, 10 },
.vsync_len = { 5, 5, 5 },
+ .flags = DISPLAY_FLAGS_DE_HIGH,
};
static const struct panel_desc koe_tx26d202vm0bwa = {
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index b8a84f26b3ef..b5e7b919f241 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
int ret = 0;
void *out_alloc;
+ if (!in->count)
+ return NULL;
+
/* User stride must be at least the minimum object size, otherwise it might
* lack useful information.
*/
if (in->stride < min_stride)
return ERR_PTR(-EINVAL);
- if (!in->count)
- return NULL;
-
out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
if (!out_alloc)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 79ffcbc41d78..9a0ff48f7061 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -459,6 +459,16 @@ struct panthor_queue {
atomic64_t seqno;
/**
+ * @last_fence: Fence of the last submitted job.
+ *
+ * We return this fence when we get an empty command stream.
+ * This way, we are guaranteed that all earlier jobs have completed
+ * when drm_sched_job::s_fence::finished without having to feed
+ * the CS ring buffer with a dummy job that only signals the fence.
+ */
+ struct dma_fence *last_fence;
+
+ /**
* @in_flight_jobs: List containing all in-flight jobs.
*
* Used to keep track and signal panthor_job::done_fence when the
@@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
+ /* Release the last_fence we were holding, if any. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+
kfree(queue);
}
@@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work)
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (!job->call_info.size)
- continue;
-
if (syncobj->seqno < job->done_fence->seqno)
break;
@@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job)
static_assert(sizeof(call_instrs) % 64 == 0,
"call_instrs is not aligned on a cacheline");
- /* Stream size is zero, nothing to do => return a NULL fence and let
- * drm_sched signal the parent.
+ /* Stream size is zero, nothing to do except making sure all previously
+ * submitted jobs are done before we signal the
+ * drm_sched_job::s_fence::finished fence.
*/
- if (!job->call_info.size)
- return NULL;
+ if (!job->call_info.size) {
+ job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
+ return dma_fence_get(job->done_fence);
+ }
ret = pm_runtime_resume_and_get(ptdev->base.dev);
if (drm_WARN_ON(&ptdev->base, ret))
@@ -2928,6 +2941,10 @@ queue_run_job(struct drm_sched_job *sched_job)
}
}
+ /* Update the last fence. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+ queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
+
done_fence = dma_fence_get(job->done_fence);
out_unlock:
@@ -3378,10 +3395,15 @@ panthor_job_create(struct panthor_file *pfile,
goto err_put_job;
}
- job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
- if (!job->done_fence) {
- ret = -ENOMEM;
- goto err_put_job;
+ /* Empty command streams don't need a fence, they'll pick the one from
+ * the previously submitted job.
+ */
+ if (job->call_info.size) {
+ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
+ if (!job->done_fence) {
+ ret = -ENOMEM;
+ goto err_put_job;
+ }
}
ret = drm_sched_job_init(&job->base,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 2ef201a072f1..e66a230331ee 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
if (r)
goto error_unlock;
- if (bo_va->it.start)
+ if (bo_va->it.start && bo_va->bo)
r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource);
error_unlock:
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index 21d27e6235f3..b11f7c5bbcbe 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1619,6 +1619,8 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev,
for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) {
if (table[i].ulSupportedSCLK != 0) {
+ if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES)
+ continue;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit =
table[i].usVoltageID;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit =
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..2427be8bc97f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref)
if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
(want_init_on_free() && (bo->ttm != NULL)) ||
+ bo->type == ttm_bo_type_sg ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index faddae3d6ac2..6f1ac940cbae 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -2,7 +2,7 @@
config DRM_VMWGFX
tristate "DRM driver for VMware Virtual GPU"
depends on DRM && PCI && MMU
- depends on X86 || ARM64
+ depends on (X86 && HYPERVISOR_GUEST) || ARM64
select DRM_TTM
select DRM_TTM_HELPER
select MAPPING_DIRTY_HELPERS
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index d46f87a039f2..b3d3c065dd9d 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -159,12 +159,16 @@ void intel_hdcp_gsc_fini(struct xe_device *xe)
{
struct intel_hdcp_gsc_message *hdcp_message =
xe->display.hdcp.hdcp_message;
+ struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter;
- if (!hdcp_message)
- return;
+ if (hdcp_message) {
+ xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
+ kfree(hdcp_message);
+ xe->display.hdcp.hdcp_message = NULL;
+ }
- xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
- kfree(hdcp_message);
+ kfree(arb);
+ xe->display.hdcp.arbiter = NULL;
}
static int xe_gsc_send_sync(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bc1f794e3e61..b6f3a43d637f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -317,7 +317,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
- enum ttm_caching caching;
+ enum ttm_caching caching = ttm_cached;
int err;
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -331,26 +331,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
PAGE_SIZE);
- switch (bo->cpu_caching) {
- case DRM_XE_GEM_CPU_CACHING_WC:
- caching = ttm_write_combined;
- break;
- default:
- caching = ttm_cached;
- break;
- }
-
- WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
- * Display scanout is always non-coherent with the CPU cache.
- *
- * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
- * require a CPU:WC mapping.
+ * DGFX system memory is always WB / ttm_cached, since
+ * other caching modes are only supported on x86. DGFX
+ * GPU system memory accesses are always coherent with the
+ * CPU.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
- caching = ttm_write_combined;
+ if (!IS_DGFX(xe)) {
+ switch (bo->cpu_caching) {
+ case DRM_XE_GEM_CPU_CACHING_WC:
+ caching = ttm_write_combined;
+ break;
+ default:
+ caching = ttm_cached;
+ break;
+ }
+
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+ /*
+ * Display scanout is always non-coherent with the CPU cache.
+ *
+ * For Xe_LPG and beyond, PPGTT PTE lookups are also
+ * non-coherent and require a CPU:WC mapping.
+ */
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 &&
+ bo->flags & XE_BO_FLAG_PAGETABLE))
+ caching = ttm_write_combined;
+ }
err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
if (err) {
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 86422e113d39..10450f1fbbde 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -66,7 +66,8 @@ struct xe_bo {
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
- * objects.
+ * objects. Exceptions are system memory on DGFX, which is always
+ * WB.
*/
u16 cpu_caching;
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 577bd7043740..0443e07880a0 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
else
gt->steering[OADDRM].group_target = 1;
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[OADDRM].instance_target = 0; /* unused */
}
static void init_steering_sqidi_psmi(struct xe_gt *gt)
@@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
static void init_steering_inst0(struct xe_gt *gt)
{
- gt->steering[DSS].group_target = 0; /* unused */
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[INSTANCE0].group_target = 0; /* unused */
+ gt->steering[INSTANCE0].instance_target = 0; /* unused */
}
static const struct {
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 240e7a4bbff1..5faca4fc2fef 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -631,8 +631,6 @@ int xe_guc_enable_communication(struct xe_guc *guc)
struct xe_device *xe = guc_to_xe(guc);
int err;
- guc_enable_irq(guc);
-
if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) {
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
@@ -640,6 +638,8 @@ int xe_guc_enable_communication(struct xe_guc *guc)
err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
if (err)
return err;
+ } else {
+ guc_enable_irq(guc);
}
xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 65e5a3f4c340..198f5c2189cb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1334,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
GFP_KERNEL, true, 0);
if (IS_ERR(sa_bo)) {
err = PTR_ERR(sa_bo);
- goto err;
+ goto err_bb;
}
ppgtt_ofs = NUM_KERNEL_PDE +
@@ -1385,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
update_idx);
if (IS_ERR(job)) {
err = PTR_ERR(job);
- goto err_bb;
+ goto err_sa;
}
/* Wait on BO move */
@@ -1434,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
+err_sa:
+ drm_suballoc_free(sa_bo, NULL);
err_bb:
if (!q)
mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
-err:
- drm_suballoc_free(sa_bo, NULL);
return ERR_PTR(err);
}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a8ad728354cb..e0d676c74f14 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -45,8 +45,8 @@ int hv_init(void)
* This involves a hypercall.
*/
int hv_post_message(union hv_connection_id connection_id,
- enum hv_message_type message_type,
- void *payload, size_t payload_size)
+ enum hv_message_type message_type,
+ void *payload, size_t payload_size)
{
struct hv_input_post_message *aligned_msg;
unsigned long flags;
@@ -86,7 +86,7 @@ int hv_post_message(union hv_connection_id connection_id,
status = HV_STATUS_INVALID_PARAMETER;
} else {
status = hv_do_hypercall(HVCALL_POST_MESSAGE,
- aligned_msg, NULL);
+ aligned_msg, NULL);
}
local_irq_restore(flags);
@@ -111,7 +111,7 @@ int hv_synic_alloc(void)
hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
GFP_KERNEL);
- if (hv_context.hv_numa_map == NULL) {
+ if (!hv_context.hv_numa_map) {
pr_err("Unable to allocate NUMA map\n");
goto err;
}
@@ -120,11 +120,11 @@ int hv_synic_alloc(void)
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
tasklet_init(&hv_cpu->msg_dpc,
- vmbus_on_msg_dpc, (unsigned long) hv_cpu);
+ vmbus_on_msg_dpc, (unsigned long)hv_cpu);
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->post_msg_page == NULL) {
+ if (!hv_cpu->post_msg_page) {
pr_err("Unable to allocate post msg page\n");
goto err;
}
@@ -147,14 +147,14 @@ int hv_synic_alloc(void)
if (!ms_hyperv.paravisor_present && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->synic_message_page == NULL) {
+ if (!hv_cpu->synic_message_page) {
pr_err("Unable to allocate SYNIC message page\n");
goto err;
}
hv_cpu->synic_event_page =
(void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->synic_event_page == NULL) {
+ if (!hv_cpu->synic_event_page) {
pr_err("Unable to allocate SYNIC event page\n");
free_page((unsigned long)hv_cpu->synic_message_page);
@@ -203,14 +203,13 @@ err:
return ret;
}
-
void hv_synic_free(void)
{
int cpu, ret;
for_each_present_cpu(cpu) {
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
+ struct hv_per_cpu_context *hv_cpu =
+ per_cpu_ptr(hv_context.cpu_context, cpu);
/* It's better to leak the page if the encryption fails. */
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
@@ -262,8 +261,8 @@ void hv_synic_free(void)
*/
void hv_synic_enable_regs(unsigned int cpu)
{
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
+ struct hv_per_cpu_context *hv_cpu =
+ per_cpu_ptr(hv_context.cpu_context, cpu);
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_sint shared_sint;
@@ -277,8 +276,8 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
- hv_cpu->synic_message_page
- = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ hv_cpu->synic_message_page =
+ (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_message_page)
pr_err("Fail to map synic message page.\n");
} else {
@@ -296,8 +295,8 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
- hv_cpu->synic_event_page
- = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ hv_cpu->synic_event_page =
+ (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_event_page)
pr_err("Fail to map synic event page.\n");
} else {
@@ -348,8 +347,8 @@ int hv_synic_init(unsigned int cpu)
*/
void hv_synic_disable_regs(unsigned int cpu)
{
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
+ struct hv_per_cpu_context *hv_cpu =
+ per_cpu_ptr(hv_context.cpu_context, cpu);
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index e000fa3b9f97..0e7427c2baf5 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -25,6 +25,7 @@
#include <linux/notifier.h>
#include <linux/percpu_counter.h>
#include <linux/page_reporting.h>
+#include <linux/sizes.h>
#include <linux/hyperv.h>
#include <asm/hyperv-tlfs.h>
@@ -41,8 +42,6 @@
* Begin protocol definitions.
*/
-
-
/*
* Protocol versions. The low word is the minor version, the high word the major
* version.
@@ -71,8 +70,6 @@ enum {
DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
};
-
-
/*
* Message Types
*/
@@ -101,7 +98,6 @@ enum dm_message_type {
DM_VERSION_1_MAX = 12
};
-
/*
* Structures defining the dynamic memory management
* protocol.
@@ -115,7 +111,6 @@ union dm_version {
__u32 version;
} __packed;
-
union dm_caps {
struct {
__u64 balloon:1;
@@ -148,8 +143,6 @@ union dm_mem_page_range {
__u64 page_range;
} __packed;
-
-
/*
* The header for all dynamic memory messages:
*
@@ -174,7 +167,6 @@ struct dm_message {
__u8 data[]; /* enclosed message */
} __packed;
-
/*
* Specific message types supporting the dynamic memory protocol.
*/
@@ -271,7 +263,6 @@ struct dm_status {
__u32 io_diff;
} __packed;
-
/*
* Message to ask the guest to allocate memory - balloon up message.
* This message is sent from the host to the guest. The guest may not be
@@ -286,14 +277,13 @@ struct dm_balloon {
__u32 reservedz;
} __packed;
-
/*
* Balloon response message; this message is sent from the guest
* to the host in response to the balloon message.
*
* reservedz: Reserved; must be set to zero.
* more_pages: If FALSE, this is the last message of the transaction.
- * if TRUE there will atleast one more message from the guest.
+ * if TRUE there will be at least one more message from the guest.
*
* range_count: The number of ranges in the range array.
*
@@ -314,7 +304,7 @@ struct dm_balloon_response {
* to the guest to give guest more memory.
*
* more_pages: If FALSE, this is the last message of the transaction.
- * if TRUE there will atleast one more message from the guest.
+ * if TRUE there will be at least one more message from the guest.
*
* reservedz: Reserved; must be set to zero.
*
@@ -342,7 +332,6 @@ struct dm_unballoon_response {
struct dm_header hdr;
} __packed;
-
/*
* Hot add request message. Message sent from the host to the guest.
*
@@ -390,7 +379,6 @@ enum dm_info_type {
MAX_INFO_TYPE
};
-
/*
* Header for the information message.
*/
@@ -425,11 +413,11 @@ struct dm_info_msg {
* The range start_pfn : end_pfn specifies the range
* that the host has asked us to hot add. The range
* start_pfn : ha_end_pfn specifies the range that we have
- * currently hot added. We hot add in multiples of 128M
- * chunks; it is possible that we may not be able to bring
- * online all the pages in the region. The range
+ * currently hot added. We hot add in chunks equal to the
+ * memory block size; it is possible that we may not be able
+ * to bring online all the pages in the region. The range
* covered_start_pfn:covered_end_pfn defines the pages that can
- * be brough online.
+ * be brought online.
*/
struct hv_hotadd_state {
@@ -480,10 +468,10 @@ static unsigned long last_post_time;
static int hv_hypercall_multi_failure;
-module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
+module_param(hot_add, bool, 0644);
MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
-module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
+module_param(pressure_report_delay, uint, 0644);
MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
static atomic_t trans_id = ATOMIC_INIT(0);
@@ -502,11 +490,13 @@ enum hv_dm_state {
DM_INIT_ERROR
};
-
static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];
+
+static unsigned long ha_pages_in_chunk;
+#define HA_BYTES_IN_CHUNK (ha_pages_in_chunk << PAGE_SHIFT)
+
#define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE)
-#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE)
struct hv_dynmem_device {
struct hv_device *dev;
@@ -595,12 +585,12 @@ static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
struct hv_hotadd_gap *gap;
/* The page is not backed. */
- if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
+ if (pfn < has->covered_start_pfn || pfn >= has->covered_end_pfn)
return false;
/* Check for gaps. */
list_for_each_entry(gap, &has->gap_list, list) {
- if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
+ if (pfn >= gap->start_pfn && pfn < gap->end_pfn)
return false;
}
@@ -724,28 +714,21 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
unsigned long processed_pfn;
unsigned long total_pfn = pfn_count;
- for (i = 0; i < (size/HA_CHUNK); i++) {
- start_pfn = start + (i * HA_CHUNK);
+ for (i = 0; i < (size/ha_pages_in_chunk); i++) {
+ start_pfn = start + (i * ha_pages_in_chunk);
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
- has->ha_end_pfn += HA_CHUNK;
-
- if (total_pfn > HA_CHUNK) {
- processed_pfn = HA_CHUNK;
- total_pfn -= HA_CHUNK;
- } else {
- processed_pfn = total_pfn;
- total_pfn = 0;
- }
-
- has->covered_end_pfn += processed_pfn;
+ has->ha_end_pfn += ha_pages_in_chunk;
+ processed_pfn = umin(total_pfn, ha_pages_in_chunk);
+ total_pfn -= processed_pfn;
+ has->covered_end_pfn += processed_pfn;
}
reinit_completion(&dm_device.ol_waitevent);
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
- (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
+ HA_BYTES_IN_CHUNK, MHP_MERGE_RESOURCE);
if (ret) {
pr_err("hot_add memory failed error is %d\n", ret);
@@ -760,7 +743,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
do_hot_add = false;
}
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
- has->ha_end_pfn -= HA_CHUNK;
+ has->ha_end_pfn -= ha_pages_in_chunk;
has->covered_end_pfn -= processed_pfn;
}
break;
@@ -787,8 +770,8 @@ static void hv_online_page(struct page *pg, unsigned int order)
guard(spinlock_irqsave)(&dm_device.ha_lock);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
/* The page belongs to a different HAS. */
- if ((pfn < has->start_pfn) ||
- (pfn + (1UL << order) > has->end_pfn))
+ if (pfn < has->start_pfn ||
+ (pfn + (1UL << order) > has->end_pfn))
continue;
hv_bring_pgs_online(has, pfn, 1UL << order);
@@ -800,7 +783,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
{
struct hv_hotadd_state *has;
struct hv_hotadd_gap *gap;
- unsigned long residual, new_inc;
+ unsigned long residual;
int ret = 0;
guard(spinlock_irqsave)(&dm_device.ha_lock);
@@ -836,15 +819,9 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
* our current limit; extend it.
*/
if ((start_pfn + pfn_cnt) > has->end_pfn) {
+ /* Extend the region by multiples of ha_pages_in_chunk */
residual = (start_pfn + pfn_cnt - has->end_pfn);
- /*
- * Extend the region by multiples of HA_CHUNK.
- */
- new_inc = (residual / HA_CHUNK) * HA_CHUNK;
- if (residual % HA_CHUNK)
- new_inc += HA_CHUNK;
-
- has->end_pfn += new_inc;
+ has->end_pfn += ALIGN(residual, ha_pages_in_chunk);
}
ret = 1;
@@ -855,7 +832,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
}
static unsigned long handle_pg_range(unsigned long pg_start,
- unsigned long pg_count)
+ unsigned long pg_count)
{
unsigned long start_pfn = pg_start;
unsigned long pfn_cnt = pg_count;
@@ -866,7 +843,7 @@ static unsigned long handle_pg_range(unsigned long pg_start,
unsigned long res = 0, flags;
pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count,
- pg_start);
+ pg_start);
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
@@ -902,22 +879,19 @@ static unsigned long handle_pg_range(unsigned long pg_start,
if (start_pfn > has->start_pfn &&
online_section_nr(pfn_to_section_nr(start_pfn)))
hv_bring_pgs_online(has, start_pfn, pgs_ol);
-
}
- if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
+ if (has->ha_end_pfn < has->end_pfn && pfn_cnt > 0) {
/*
* We have some residual hot add range
* that needs to be hot added; hot add
* it now. Hot add a multiple of
- * HA_CHUNK that fully covers the pages
+ * ha_pages_in_chunk that fully covers the pages
* we have.
*/
size = (has->end_pfn - has->ha_end_pfn);
if (pfn_cnt <= size) {
- size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
- if (pfn_cnt % HA_CHUNK)
- size += HA_CHUNK;
+ size = ALIGN(pfn_cnt, ha_pages_in_chunk);
} else {
pfn_cnt = size;
}
@@ -1010,10 +984,7 @@ static void hot_add_req(struct work_struct *dummy)
rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;
- if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
- unsigned long region_size;
- unsigned long region_start;
-
+ if (rg_start == 0 && !dm->host_specified_ha_region) {
/*
* The host has not specified the hot-add region.
* Based on the hot-add page range being specified,
@@ -1021,19 +992,13 @@ static void hot_add_req(struct work_struct *dummy)
* that need to be hot-added while ensuring the alignment
* and size requirements of Linux as it relates to hot-add.
*/
- region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
- if (pfn_cnt % HA_CHUNK)
- region_size += HA_CHUNK;
-
- region_start = (pg_start / HA_CHUNK) * HA_CHUNK;
-
- rg_start = region_start;
- rg_sz = region_size;
+ rg_start = ALIGN_DOWN(pg_start, ha_pages_in_chunk);
+ rg_sz = ALIGN(pfn_cnt, ha_pages_in_chunk);
}
if (do_hot_add)
resp.page_count = process_hot_add(pg_start, pfn_cnt,
- rg_start, rg_sz);
+ rg_start, rg_sz);
dm->num_pages_added += resp.page_count;
#endif
@@ -1211,11 +1176,10 @@ static void post_status(struct hv_dynmem_device *dm)
sizeof(struct dm_status),
(unsigned long)NULL,
VM_PKT_DATA_INBAND, 0);
-
}
static void free_balloon_pages(struct hv_dynmem_device *dm,
- union dm_mem_page_range *range_array)
+ union dm_mem_page_range *range_array)
{
int num_pages = range_array->finfo.page_cnt;
__u64 start_frame = range_array->finfo.start_page;
@@ -1231,8 +1195,6 @@ static void free_balloon_pages(struct hv_dynmem_device *dm,
}
}
-
-
static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
unsigned int num_pages,
struct dm_balloon_response *bl_resp,
@@ -1278,7 +1240,6 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
page_to_pfn(pg);
bl_resp->range_array[i].finfo.page_cnt = alloc_unit;
bl_resp->hdr.size += sizeof(union dm_mem_page_range);
-
}
return i * alloc_unit;
@@ -1332,7 +1293,7 @@ static void balloon_up(struct work_struct *dummy)
if (num_ballooned == 0 || num_ballooned == num_pages) {
pr_debug("Ballooned %u out of %u requested pages.\n",
- num_pages, dm_device.balloon_wrk.num_pages);
+ num_pages, dm_device.balloon_wrk.num_pages);
bl_resp->more_pages = 0;
done = true;
@@ -1366,16 +1327,15 @@ static void balloon_up(struct work_struct *dummy)
for (i = 0; i < bl_resp->range_count; i++)
free_balloon_pages(&dm_device,
- &bl_resp->range_array[i]);
+ &bl_resp->range_array[i]);
done = true;
}
}
-
}
static void balloon_down(struct hv_dynmem_device *dm,
- struct dm_unballoon_request *req)
+ struct dm_unballoon_request *req)
{
union dm_mem_page_range *range_array = req->range_array;
int range_count = req->range_count;
@@ -1389,7 +1349,7 @@ static void balloon_down(struct hv_dynmem_device *dm,
}
pr_debug("Freed %u ballooned pages.\n",
- prev_pages_ballooned - dm->num_pages_ballooned);
+ prev_pages_ballooned - dm->num_pages_ballooned);
if (req->more_pages == 1)
return;
@@ -1414,8 +1374,7 @@ static int dm_thread_func(void *dm_dev)
struct hv_dynmem_device *dm = dm_dev;
while (!kthread_should_stop()) {
- wait_for_completion_interruptible_timeout(
- &dm_device.config_event, 1*HZ);
+ wait_for_completion_interruptible_timeout(&dm_device.config_event, 1 * HZ);
/*
* The host expects us to post information on the memory
* pressure every second.
@@ -1439,9 +1398,8 @@ static int dm_thread_func(void *dm_dev)
return 0;
}
-
static void version_resp(struct hv_dynmem_device *dm,
- struct dm_version_response *vresp)
+ struct dm_version_response *vresp)
{
struct dm_version_request version_req;
int ret;
@@ -1502,7 +1460,7 @@ version_error:
}
static void cap_resp(struct hv_dynmem_device *dm,
- struct dm_capabilities_resp_msg *cap_resp)
+ struct dm_capabilities_resp_msg *cap_resp)
{
if (!cap_resp->is_accepted) {
pr_err("Capabilities not accepted by host\n");
@@ -1535,7 +1493,7 @@ static void balloon_onchannelcallback(void *context)
switch (dm_hdr->type) {
case DM_VERSION_RESPONSE:
version_resp(dm,
- (struct dm_version_response *)dm_msg);
+ (struct dm_version_response *)dm_msg);
break;
case DM_CAPABILITIES_RESPONSE:
@@ -1565,7 +1523,7 @@ static void balloon_onchannelcallback(void *context)
dm->state = DM_BALLOON_DOWN;
balloon_down(dm,
- (struct dm_unballoon_request *)recv_buffer);
+ (struct dm_unballoon_request *)recv_buffer);
break;
case DM_MEM_HOT_ADD_REQUEST:
@@ -1603,17 +1561,15 @@ static void balloon_onchannelcallback(void *context)
default:
pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type);
-
}
}
-
}
#define HV_LARGE_REPORTING_ORDER 9
#define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \
HV_LARGE_REPORTING_ORDER)
static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
- struct scatterlist *sgl, unsigned int nents)
+ struct scatterlist *sgl, unsigned int nents)
{
unsigned long flags;
struct hv_memory_hint *hint;
@@ -1648,7 +1604,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
*/
/* page reporting for pages 2MB or higher */
- if (order >= HV_LARGE_REPORTING_ORDER ) {
+ if (order >= HV_LARGE_REPORTING_ORDER) {
range->page.largepage = 1;
range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
range->base_large_pfn = page_to_hvpfn(
@@ -1662,23 +1618,21 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
range->page.additional_pages =
(sg->length / HV_HYP_PAGE_SIZE) - 1;
}
-
}
status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0,
hint, NULL);
local_irq_restore(flags);
if (!hv_result_success(status)) {
-
pr_err("Cold memory discard hypercall failed with status %llx\n",
- status);
+ status);
if (hv_hypercall_multi_failure > 0)
hv_hypercall_multi_failure++;
if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) {
pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n");
pr_err("Defaulting to page_reporting_order %d\n",
- pageblock_order);
+ pageblock_order);
page_reporting_order = pageblock_order;
hv_hypercall_multi_failure++;
return -EINVAL;
@@ -1712,7 +1666,7 @@ static void enable_page_reporting(void)
pr_err("Failed to enable cold memory discard: %d\n", ret);
} else {
pr_info("Cold memory discard hint enabled with order %d\n",
- page_reporting_order);
+ page_reporting_order);
}
}
@@ -1795,7 +1749,7 @@ static int balloon_connect_vsp(struct hv_device *dev)
if (ret)
goto out;
- t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
+ t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto out;
@@ -1831,10 +1785,13 @@ static int balloon_connect_vsp(struct hv_device *dev)
cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
/*
- * Specify our alignment requirements as it relates
- * memory hot-add. Specify 128MB alignment.
+ * Specify our alignment requirements for memory hot-add. The value is
+ * the log base 2 of the number of megabytes in a chunk. For example,
+ * with 256 MiB chunks, the value is 8. The number of MiB in a chunk
+ * must be a power of 2.
*/
- cap_msg.caps.cap_bits.hot_add_alignment = 7;
+ cap_msg.caps.cap_bits.hot_add_alignment =
+ ilog2(HA_BYTES_IN_CHUNK / SZ_1M);
/*
* Currently the host does not use these
@@ -1850,7 +1807,7 @@ static int balloon_connect_vsp(struct hv_device *dev)
if (ret)
goto out;
- t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
+ t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto out;
@@ -1891,8 +1848,8 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset)
char *sname;
seq_printf(f, "%-22s: %u.%u\n", "host_version",
- DYNMEM_MAJOR_VERSION(dm->version),
- DYNMEM_MINOR_VERSION(dm->version));
+ DYNMEM_MAJOR_VERSION(dm->version),
+ DYNMEM_MINOR_VERSION(dm->version));
seq_printf(f, "%-22s:", "capabilities");
if (ballooning_enabled())
@@ -1941,10 +1898,10 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset)
seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned);
seq_printf(f, "%-22s: %lu\n", "total_pages_committed",
- get_pages_committed(dm));
+ get_pages_committed(dm));
seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count",
- dm->max_dynamic_page_count);
+ dm->max_dynamic_page_count);
return 0;
}
@@ -1954,7 +1911,7 @@ DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug);
static void hv_balloon_debugfs_init(struct hv_dynmem_device *b)
{
debugfs_create_file("hv-balloon", 0444, NULL, b,
- &hv_balloon_debug_fops);
+ &hv_balloon_debug_fops);
}
static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
@@ -1984,8 +1941,23 @@ static int balloon_probe(struct hv_device *dev,
hot_add = false;
#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Hot-add must operate in chunks that are of size equal to the
+ * memory block size because that's what the core add_memory()
+ * interface requires. The Hyper-V interface requires that the memory
+ * block size be a power of 2, which is guaranteed by the check in
+ * memory_dev_init().
+ */
+ ha_pages_in_chunk = memory_block_size_bytes() / PAGE_SIZE;
do_hot_add = hot_add;
#else
+ /*
+ * Without MEMORY_HOTPLUG, the guest returns a failure status for all
+ * hot add requests from Hyper-V, and the chunk size is used only to
+ * specify alignment to Hyper-V as required by the host/guest protocol.
+ * Somewhat arbitrarily, use 128 MiB.
+ */
+ ha_pages_in_chunk = SZ_128M / PAGE_SIZE;
do_hot_add = false;
#endif
dm_device.dev = dev;
@@ -2097,7 +2069,6 @@ static int balloon_suspend(struct hv_device *hv_dev)
tasklet_enable(&hv_dev->channel->callback_event);
return 0;
-
}
static int balloon_resume(struct hv_device *dev)
@@ -2156,7 +2127,6 @@ static struct hv_driver balloon_drv = {
static int __init init_balloon_drv(void)
{
-
return vmbus_driver_register(&balloon_drv);
}
diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 9388823bb0bb..44710267d669 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -135,7 +135,7 @@ config I2C_SLAVE_EEPROM
Documentation/i2c/slave-eeprom-backend.rst for further details.
config I2C_SLAVE_TESTUNIT
- tristate "I2C eeprom testunit driver"
+ tristate "I2C testunit driver"
help
This backend can be used to trigger test cases for I2C bus masters
which require a remote device with certain capabilities, e.g.
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 3d65934f5eb4..78d0561339e5 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -29,8 +29,7 @@ obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o
obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o
obj-$(CONFIG_I2C_VIA) += i2c-via.o
obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o
-i2c-zhaoxin-objs := i2c-viai2c-zhaoxin.o i2c-viai2c-common.o
-obj-$(CONFIG_I2C_ZHAOXIN) += i2c-zhaoxin.o
+obj-$(CONFIG_I2C_ZHAOXIN) += i2c-viai2c-zhaoxin.o i2c-viai2c-common.o
# Mac SMBus host controller drivers
obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o
@@ -120,8 +119,7 @@ obj-$(CONFIG_I2C_TEGRA_BPMP) += i2c-tegra-bpmp.o
obj-$(CONFIG_I2C_UNIPHIER) += i2c-uniphier.o
obj-$(CONFIG_I2C_UNIPHIER_F) += i2c-uniphier-f.o
obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o
-i2c-wmt-objs := i2c-viai2c-wmt.o i2c-viai2c-common.o
-obj-$(CONFIG_I2C_WMT) += i2c-wmt.o
+obj-$(CONFIG_I2C_WMT) += i2c-viai2c-wmt.o i2c-viai2c-common.o
i2c-octeon-objs := i2c-octeon-core.o i2c-octeon-platdrv.o
obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o
i2c-thunderx-objs := i2c-octeon-core.o i2c-thunderx-pcidrv.o
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 56a4dabf5a38..4ad670a80a63 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -431,8 +431,8 @@ static int ocores_init(struct device *dev, struct ocores_i2c *i2c)
oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8);
/* Init the device */
- oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK);
oc_setreg(i2c, OCI2C_CONTROL, ctrl | OCI2C_CTRL_EN);
+ oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index a12525b3186b..f448505d5468 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -15,7 +15,6 @@
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/i2c.h>
-#include <linux/timer.h>
#include <linux/completion.h>
#include <linux/platform_device.h>
#include <linux/io.h>
@@ -32,7 +31,6 @@ struct i2c_pnx_mif {
int ret; /* Return value */
int mode; /* Interface mode */
struct completion complete; /* I/O completion */
- struct timer_list timer; /* Timeout */
u8 * buf; /* Data buffer */
int len; /* Length of data buffer */
int order; /* RX Bytes to order via TX */
@@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data)
return (timeout <= 0);
}
-static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
-{
- struct timer_list *timer = &alg_data->mif.timer;
- unsigned long expires = msecs_to_jiffies(alg_data->timeout);
-
- if (expires <= 1)
- expires = 2;
-
- del_timer_sync(timer);
-
- dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n",
- jiffies, expires);
-
- timer->expires = jiffies + expires;
-
- add_timer(timer);
-}
-
/**
* i2c_pnx_start - start a device
* @slave_addr: slave address
@@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- del_timer_sync(&alg_data->mif.timer);
-
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine.\n",
__func__);
@@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- /* Stop timer. */
- del_timer_sync(&alg_data->mif.timer);
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine after zero-xfer.\n",
__func__);
@@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data)
mcntrl_drmie | mcntrl_daie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Kill timer. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
}
}
@@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
mcntrl_drmie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else if (stat & mstatus_nai) {
/* Slave did not acknowledge, generate a STOP */
@@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
/* Our return value. */
alg_data->mif.ret = -EIO;
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else {
/*
@@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void i2c_pnx_timeout(struct timer_list *t)
+static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data)
{
- struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer);
u32 ctl;
dev_err(&alg_data->adapter.dev,
@@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t)
iowrite32(ctl, I2C_REG_CTL(alg_data));
wait_reset(alg_data);
alg_data->mif.ret = -EIO;
- complete(&alg_data->mif.complete);
}
static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data)
@@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
struct i2c_msg *pmsg;
int rc = 0, completed = 0, i;
struct i2c_pnx_algo_data *alg_data = adap->algo_data;
+ unsigned long time_left;
u32 stat;
dev_dbg(&alg_data->adapter.dev,
@@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n",
__func__, alg_data->mif.mode, alg_data->mif.len);
- i2c_pnx_arm_timer(alg_data);
/* initialize the completion var */
init_completion(&alg_data->mif.complete);
@@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
break;
/* Wait for completion */
- wait_for_completion(&alg_data->mif.complete);
+ time_left = wait_for_completion_timeout(&alg_data->mif.complete,
+ alg_data->timeout);
+ if (time_left == 0)
+ i2c_pnx_timeout(alg_data);
if (!(rc = alg_data->mif.ret))
completed++;
@@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev)
alg_data->adapter.algo_data = alg_data;
alg_data->adapter.nr = pdev->id;
- alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT;
+ alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT);
+ if (alg_data->timeout <= 1)
+ alg_data->timeout = 2;
+
#ifdef CONFIG_OF
alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node);
if (pdev->dev.of_node) {
@@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev)
if (IS_ERR(alg_data->clk))
return PTR_ERR(alg_data->clk);
- timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0);
-
snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name),
"%s", pdev->name);
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 828aa2ea0fe4..185a5d60f101 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -257,6 +257,14 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv)
}
}
+static void rcar_i2c_reset_slave(struct rcar_i2c_priv *priv)
+{
+ rcar_i2c_write(priv, ICSIER, 0);
+ rcar_i2c_write(priv, ICSSR, 0);
+ rcar_i2c_write(priv, ICSCR, SDBS);
+ rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */
+}
+
static int rcar_i2c_bus_barrier(struct rcar_i2c_priv *priv)
{
int ret;
@@ -875,6 +883,10 @@ static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv)
{
int ret;
+ /* Don't reset if a slave instance is currently running */
+ if (priv->slave)
+ return -EISCONN;
+
ret = reset_control_reset(priv->rstc);
if (ret)
return ret;
@@ -903,10 +915,10 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
/* Gen3+ needs a reset. That also allows RXDMA once */
if (priv->devtype >= I2C_RCAR_GEN3) {
- priv->flags &= ~ID_P_NO_RXDMA;
ret = rcar_i2c_do_reset(priv);
if (ret)
goto out;
+ priv->flags &= ~ID_P_NO_RXDMA;
}
rcar_i2c_init(priv);
@@ -1033,11 +1045,8 @@ static int rcar_unreg_slave(struct i2c_client *slave)
/* ensure no irq is running before clearing ptr */
disable_irq(priv->irq);
- rcar_i2c_write(priv, ICSIER, 0);
- rcar_i2c_write(priv, ICSSR, 0);
+ rcar_i2c_reset_slave(priv);
enable_irq(priv->irq);
- rcar_i2c_write(priv, ICSCR, SDBS);
- rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */
priv->slave = NULL;
@@ -1152,7 +1161,9 @@ static int rcar_i2c_probe(struct platform_device *pdev)
goto out_pm_disable;
}
- rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */
+ /* Bring hardware to known state */
+ rcar_i2c_init(priv);
+ rcar_i2c_reset_slave(priv);
if (priv->devtype < I2C_RCAR_GEN3) {
irqflags |= IRQF_NO_THREAD;
@@ -1168,6 +1179,7 @@ static int rcar_i2c_probe(struct platform_device *pdev)
if (of_property_read_bool(dev->of_node, "smbus"))
priv->flags |= ID_P_HOST_NOTIFY;
+ /* R-Car Gen3+ needs a reset before every transfer */
if (priv->devtype >= I2C_RCAR_GEN3) {
priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
if (IS_ERR(priv->rstc)) {
@@ -1178,6 +1190,9 @@ static int rcar_i2c_probe(struct platform_device *pdev)
ret = reset_control_status(priv->rstc);
if (ret < 0)
goto out_pm_put;
+
+ /* hard reset disturbs HostNotify local target, so disable it */
+ priv->flags &= ~ID_P_HOST_NOTIFY;
}
ret = platform_get_irq(pdev, 0);
diff --git a/drivers/i2c/busses/i2c-viai2c-common.c b/drivers/i2c/busses/i2c-viai2c-common.c
index 1844d13f1f79..162b31306cba 100644
--- a/drivers/i2c/busses/i2c-viai2c-common.c
+++ b/drivers/i2c/busses/i2c-viai2c-common.c
@@ -17,6 +17,7 @@ int viai2c_wait_bus_not_busy(struct viai2c *i2c)
return 0;
}
+EXPORT_SYMBOL_GPL(viai2c_wait_bus_not_busy);
static int viai2c_write(struct viai2c *i2c, struct i2c_msg *pmsg, int last)
{
@@ -121,6 +122,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
return (ret < 0) ? ret : i;
}
+EXPORT_SYMBOL_GPL(viai2c_xfer);
/*
* Main process of the byte mode xfer
@@ -130,7 +132,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
* 0: there is still data that needs to be transferred
* -EIO: error occurred
*/
-static int viai2c_irq_xfer(struct viai2c *i2c)
+int viai2c_irq_xfer(struct viai2c *i2c)
{
u16 val;
struct i2c_msg *msg = i2c->msg;
@@ -171,51 +173,11 @@ static int viai2c_irq_xfer(struct viai2c *i2c)
return i2c->xfered_len == msg->len;
}
-
-int __weak viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
-{
- return 0;
-}
-
-static irqreturn_t viai2c_isr(int irq, void *data)
-{
- struct viai2c *i2c = data;
- u8 status;
-
- /* save the status and write-clear it */
- status = readw(i2c->base + VIAI2C_REG_ISR);
- if (!status && i2c->platform == VIAI2C_PLAT_ZHAOXIN)
- return IRQ_NONE;
-
- writew(status, i2c->base + VIAI2C_REG_ISR);
-
- i2c->ret = 0;
- if (status & VIAI2C_ISR_NACK_ADDR)
- i2c->ret = -EIO;
-
- if (i2c->platform == VIAI2C_PLAT_WMT && (status & VIAI2C_ISR_SCL_TIMEOUT))
- i2c->ret = -ETIMEDOUT;
-
- if (!i2c->ret) {
- if (i2c->mode == VIAI2C_BYTE_MODE)
- i2c->ret = viai2c_irq_xfer(i2c);
- else
- i2c->ret = viai2c_fifo_irq_xfer(i2c, true);
- }
-
- /* All the data has been successfully transferred or error occurred */
- if (i2c->ret)
- complete(&i2c->complete);
-
- return IRQ_HANDLED;
-}
+EXPORT_SYMBOL_GPL(viai2c_irq_xfer);
int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
{
- int err;
- int irq_flags;
struct viai2c *i2c;
- struct device_node *np = pdev->dev.of_node;
i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
if (!i2c)
@@ -225,28 +187,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
if (IS_ERR(i2c->base))
return PTR_ERR(i2c->base);
- if (plat == VIAI2C_PLAT_WMT) {
- irq_flags = 0;
- i2c->irq = irq_of_parse_and_map(np, 0);
- if (!i2c->irq)
- return -EINVAL;
- } else if (plat == VIAI2C_PLAT_ZHAOXIN) {
- irq_flags = IRQF_SHARED;
- i2c->irq = platform_get_irq(pdev, 0);
- if (i2c->irq < 0)
- return i2c->irq;
- } else {
- return dev_err_probe(&pdev->dev, -EINVAL, "wrong platform type\n");
- }
-
i2c->platform = plat;
- err = devm_request_irq(&pdev->dev, i2c->irq, viai2c_isr,
- irq_flags, pdev->name, i2c);
- if (err)
- return dev_err_probe(&pdev->dev, err,
- "failed to request irq %i\n", i2c->irq);
-
i2c->dev = &pdev->dev;
init_completion(&i2c->complete);
platform_set_drvdata(pdev, i2c);
@@ -254,3 +196,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
*pi2c = i2c;
return 0;
}
+EXPORT_SYMBOL_GPL(viai2c_init);
+
+MODULE_DESCRIPTION("Via/Wondermedia/Zhaoxin I2C master-mode bus adapter");
+MODULE_AUTHOR("Tony Prisk <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-viai2c-common.h b/drivers/i2c/busses/i2c-viai2c-common.h
index 81e827c54434..00f17733223c 100644
--- a/drivers/i2c/busses/i2c-viai2c-common.h
+++ b/drivers/i2c/busses/i2c-viai2c-common.h
@@ -80,6 +80,6 @@ struct viai2c {
int viai2c_wait_bus_not_busy(struct viai2c *i2c);
int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num);
int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat);
-int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq);
+int viai2c_irq_xfer(struct viai2c *i2c);
#endif
diff --git a/drivers/i2c/busses/i2c-viai2c-wmt.c b/drivers/i2c/busses/i2c-viai2c-wmt.c
index e1988f946026..420fd10fe3aa 100644
--- a/drivers/i2c/busses/i2c-viai2c-wmt.c
+++ b/drivers/i2c/busses/i2c-viai2c-wmt.c
@@ -72,6 +72,32 @@ static int wmt_i2c_reset_hardware(struct viai2c *i2c)
return 0;
}
+static irqreturn_t wmt_i2c_isr(int irq, void *data)
+{
+ struct viai2c *i2c = data;
+ u8 status;
+
+ /* save the status and write-clear it */
+ status = readw(i2c->base + VIAI2C_REG_ISR);
+ writew(status, i2c->base + VIAI2C_REG_ISR);
+
+ i2c->ret = 0;
+ if (status & VIAI2C_ISR_NACK_ADDR)
+ i2c->ret = -EIO;
+
+ if (status & VIAI2C_ISR_SCL_TIMEOUT)
+ i2c->ret = -ETIMEDOUT;
+
+ if (!i2c->ret)
+ i2c->ret = viai2c_irq_xfer(i2c);
+
+ /* All the data has been successfully transferred or error occurred */
+ if (i2c->ret)
+ complete(&i2c->complete);
+
+ return IRQ_HANDLED;
+}
+
static int wmt_i2c_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -84,6 +110,16 @@ static int wmt_i2c_probe(struct platform_device *pdev)
if (err)
return err;
+ i2c->irq = platform_get_irq(pdev, 0);
+ if (i2c->irq < 0)
+ return i2c->irq;
+
+ err = devm_request_irq(&pdev->dev, i2c->irq, wmt_i2c_isr,
+ 0, pdev->name, i2c);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "failed to request irq %i\n", i2c->irq);
+
i2c->clk = of_clk_get(np, 0);
if (IS_ERR(i2c->clk)) {
dev_err(&pdev->dev, "unable to request clock\n");
diff --git a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
index 7e3ac2a3e1fd..ab3e44e147e9 100644
--- a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
+++ b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
@@ -49,8 +49,7 @@ struct viai2c_zhaoxin {
u16 xfer_len;
};
-/* 'irq == true' means in interrupt context */
-int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
+static int viai2c_fifo_xfer(struct viai2c *i2c)
{
u16 i;
u8 tmp;
@@ -59,17 +58,6 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
bool read = !!(msg->flags & I2C_M_RD);
struct viai2c_zhaoxin *priv = i2c->pltfm_priv;
- if (irq) {
- /* get the received data */
- if (read)
- for (i = 0; i < priv->xfer_len; i++)
- msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR);
-
- i2c->xfered_len += priv->xfer_len;
- if (i2c->xfered_len == msg->len)
- return 1;
- }
-
/* reset fifo buffer */
tmp = ioread8(base + ZXI2C_REG_HCR);
iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
@@ -92,18 +80,59 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
iowrite8(tmp, base + VIAI2C_REG_CR);
}
- if (irq) {
- /* continue transmission */
- tmp = ioread8(base + VIAI2C_REG_CR);
- iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR);
+ u16 tcr_val = i2c->tcr;
+
+ /* start transmission */
+ tcr_val |= read ? VIAI2C_TCR_READ : 0;
+ writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR);
+
+ return 0;
+}
+
+static int viai2c_fifo_irq_xfer(struct viai2c *i2c)
+{
+ u16 i;
+ u8 tmp;
+ struct i2c_msg *msg = i2c->msg;
+ void __iomem *base = i2c->base;
+ bool read = !!(msg->flags & I2C_M_RD);
+ struct viai2c_zhaoxin *priv = i2c->pltfm_priv;
+
+ /* get the received data */
+ if (read)
+ for (i = 0; i < priv->xfer_len; i++)
+ msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR);
+
+ i2c->xfered_len += priv->xfer_len;
+ if (i2c->xfered_len == msg->len)
+ return 1;
+
+ /* reset fifo buffer */
+ tmp = ioread8(base + ZXI2C_REG_HCR);
+ iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
+
+ /* set xfer len */
+ priv->xfer_len = min_t(u16, msg->len - i2c->xfered_len, ZXI2C_FIFO_SIZE);
+ if (read) {
+ iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HRLR);
} else {
- u16 tcr_val = i2c->tcr;
+ iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HTLR);
+ /* set write data */
+ for (i = 0; i < priv->xfer_len; i++)
+ iowrite8(msg->buf[i2c->xfered_len + i], base + ZXI2C_REG_HTDR);
+ }
- /* start transmission */
- tcr_val |= read ? VIAI2C_TCR_READ : 0;
- writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR);
+ /* prepare to stop transmission */
+ if (priv->hrv && msg->len == (i2c->xfered_len + priv->xfer_len)) {
+ tmp = ioread8(base + VIAI2C_REG_CR);
+ tmp |= read ? VIAI2C_CR_RX_END : VIAI2C_CR_TX_END;
+ iowrite8(tmp, base + VIAI2C_REG_CR);
}
+ /* continue transmission */
+ tmp = ioread8(base + VIAI2C_REG_CR);
+ iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR);
+
return 0;
}
@@ -135,7 +164,7 @@ static int zxi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int
priv->xfer_len = 0;
i2c->xfered_len = 0;
- viai2c_fifo_irq_xfer(i2c, 0);
+ viai2c_fifo_xfer(i2c);
if (!wait_for_completion_timeout(&i2c->complete, VIAI2C_TIMEOUT))
return -ETIMEDOUT;
@@ -228,6 +257,36 @@ static void zxi2c_get_bus_speed(struct viai2c *i2c)
dev_info(i2c->dev, "speed mode is %s\n", i2c_freq_mode_string(params[0]));
}
+static irqreturn_t zxi2c_isr(int irq, void *data)
+{
+ struct viai2c *i2c = data;
+ u8 status;
+
+ /* save the status and write-clear it */
+ status = readw(i2c->base + VIAI2C_REG_ISR);
+ if (!status)
+ return IRQ_NONE;
+
+ writew(status, i2c->base + VIAI2C_REG_ISR);
+
+ i2c->ret = 0;
+ if (status & VIAI2C_ISR_NACK_ADDR)
+ i2c->ret = -EIO;
+
+ if (!i2c->ret) {
+ if (i2c->mode == VIAI2C_BYTE_MODE)
+ i2c->ret = viai2c_irq_xfer(i2c);
+ else
+ i2c->ret = viai2c_fifo_irq_xfer(i2c);
+ }
+
+ /* All the data has been successfully transferred or error occurred */
+ if (i2c->ret)
+ complete(&i2c->complete);
+
+ return IRQ_HANDLED;
+}
+
static int zxi2c_probe(struct platform_device *pdev)
{
int error;
@@ -239,6 +298,16 @@ static int zxi2c_probe(struct platform_device *pdev)
if (error)
return error;
+ i2c->irq = platform_get_irq(pdev, 0);
+ if (i2c->irq < 0)
+ return i2c->irq;
+
+ error = devm_request_irq(&pdev->dev, i2c->irq, zxi2c_isr,
+ IRQF_SHARED, pdev->name, i2c);
+ if (error)
+ return dev_err_probe(&pdev->dev, error,
+ "failed to request irq %i\n", i2c->irq);
+
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index db0d1ac82910..7e7b15440832 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1067,6 +1067,7 @@ EXPORT_SYMBOL(i2c_find_device_by_fwnode);
static const struct i2c_device_id dummy_id[] = {
{ "dummy", 0 },
+ { "smbus_host_notify", 0 },
{ },
};
diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c
index a49642bbae4b..23a11e4e9256 100644
--- a/drivers/i2c/i2c-slave-testunit.c
+++ b/drivers/i2c/i2c-slave-testunit.c
@@ -118,9 +118,19 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client,
queue_delayed_work(system_long_wq, &tu->worker,
msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY]));
}
- fallthrough;
+
+ /*
+ * Reset reg_idx to avoid that work gets queued again in case of
+ * STOP after a following read message. But do not clear TU regs
+ * here because we still need them in the workqueue!
+ */
+ tu->reg_idx = 0;
+ break;
case I2C_SLAVE_WRITE_REQUESTED:
+ if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags))
+ return -EBUSY;
+
memset(tu->regs, 0, TU_NUM_REGS);
tu->reg_idx = 0;
break;
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index c2da5066e9a7..80b57d3ee3a7 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -330,6 +330,8 @@ config DMARD10
config FXLS8962AF
tristate
depends on I2C || !I2C # cannot be built-in for modular I2C
+ select IIO_BUFFER
+ select IIO_KFIFO_BUF
config FXLS8962AF_I2C
tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver"
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 353a97f9c086..13ea8a1073d2 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -157,6 +157,8 @@ static int ad7266_read_raw(struct iio_dev *indio_dev,
ret = ad7266_read_single(st, val, chan->address);
iio_device_release_direct_mode(indio_dev);
+ if (ret < 0)
+ return ret;
*val = (*val >> 2) & 0xfff;
if (chan->scan_type.sign == 's')
*val = sign_extend32(*val,
diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c
index f0b71a1220e0..f52abf759260 100644
--- a/drivers/iio/adc/xilinx-ams.c
+++ b/drivers/iio/adc/xilinx-ams.c
@@ -414,8 +414,12 @@ static void ams_enable_channel_sequence(struct iio_dev *indio_dev)
/* Run calibration of PS & PL as part of the sequence */
scan_mask = BIT(0) | BIT(AMS_PS_SEQ_MAX);
- for (i = 0; i < indio_dev->num_channels; i++)
- scan_mask |= BIT_ULL(indio_dev->channels[i].scan_index);
+ for (i = 0; i < indio_dev->num_channels; i++) {
+ const struct iio_chan_spec *chan = &indio_dev->channels[i];
+
+ if (chan->scan_index < AMS_CTRL_SEQ_BASE)
+ scan_mask |= BIT_ULL(chan->scan_index);
+ }
if (ams->ps_base) {
/* put sysmon in a soft reset to change the sequence */
diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h
index 4edc5d21cb9f..f959252a4fe6 100644
--- a/drivers/iio/chemical/bme680.h
+++ b/drivers/iio/chemical/bme680.h
@@ -54,7 +54,9 @@
#define BME680_NB_CONV_MASK GENMASK(3, 0)
#define BME680_REG_MEAS_STAT_0 0x1D
+#define BME680_NEW_DATA_BIT BIT(7)
#define BME680_GAS_MEAS_BIT BIT(6)
+#define BME680_MEAS_BIT BIT(5)
/* Calibration Parameters */
#define BME680_T2_LSB_REG 0x8A
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index ef5e0e46fd34..500f56834b01 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -10,6 +10,7 @@
*/
#include <linux/acpi.h>
#include <linux/bitfield.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/log2.h>
@@ -38,7 +39,7 @@ struct bme680_calib {
s8 par_h3;
s8 par_h4;
s8 par_h5;
- s8 par_h6;
+ u8 par_h6;
s8 par_h7;
s8 par_gh1;
s16 par_gh2;
@@ -342,10 +343,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data,
if (!calib->par_t2)
bme680_read_calib(data, calib);
- var1 = (adc_temp >> 3) - (calib->par_t1 << 1);
+ var1 = (adc_temp >> 3) - ((s32)calib->par_t1 << 1);
var2 = (var1 * calib->par_t2) >> 11;
var3 = ((var1 >> 1) * (var1 >> 1)) >> 12;
- var3 = (var3 * (calib->par_t3 << 4)) >> 14;
+ var3 = (var3 * ((s32)calib->par_t3 << 4)) >> 14;
data->t_fine = var2 + var3;
calc_temp = (data->t_fine * 5 + 128) >> 8;
@@ -368,9 +369,9 @@ static u32 bme680_compensate_press(struct bme680_data *data,
var1 = (data->t_fine >> 1) - 64000;
var2 = ((((var1 >> 2) * (var1 >> 2)) >> 11) * calib->par_p6) >> 2;
var2 = var2 + (var1 * calib->par_p5 << 1);
- var2 = (var2 >> 2) + (calib->par_p4 << 16);
+ var2 = (var2 >> 2) + ((s32)calib->par_p4 << 16);
var1 = (((((var1 >> 2) * (var1 >> 2)) >> 13) *
- (calib->par_p3 << 5)) >> 3) +
+ ((s32)calib->par_p3 << 5)) >> 3) +
((calib->par_p2 * var1) >> 1);
var1 = var1 >> 18;
var1 = ((32768 + var1) * calib->par_p1) >> 15;
@@ -388,7 +389,7 @@ static u32 bme680_compensate_press(struct bme680_data *data,
var3 = ((press_comp >> 8) * (press_comp >> 8) *
(press_comp >> 8) * calib->par_p10) >> 17;
- press_comp += (var1 + var2 + var3 + (calib->par_p7 << 7)) >> 4;
+ press_comp += (var1 + var2 + var3 + ((s32)calib->par_p7 << 7)) >> 4;
return press_comp;
}
@@ -414,7 +415,7 @@ static u32 bme680_compensate_humid(struct bme680_data *data,
(((temp_scaled * ((temp_scaled * calib->par_h5) / 100))
>> 6) / 100) + (1 << 14))) >> 10;
var3 = var1 * var2;
- var4 = calib->par_h6 << 7;
+ var4 = (s32)calib->par_h6 << 7;
var4 = (var4 + ((temp_scaled * calib->par_h7) / 100)) >> 4;
var5 = ((var3 >> 14) * (var3 >> 14)) >> 10;
var6 = (var4 * var5) >> 1;
@@ -532,6 +533,43 @@ static u8 bme680_oversampling_to_reg(u8 val)
return ilog2(val) + 1;
}
+/*
+ * Taken from Bosch BME680 API:
+ * https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L490
+ */
+static int bme680_wait_for_eoc(struct bme680_data *data)
+{
+ struct device *dev = regmap_get_device(data->regmap);
+ unsigned int check;
+ int ret;
+ /*
+ * (Sum of oversampling ratios * time per oversampling) +
+ * TPH measurement + gas measurement + wait transition from forced mode
+ * + heater duration
+ */
+ int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press +
+ data->oversampling_humid) * 1936) + (477 * 4) +
+ (477 * 5) + 1000 + (data->heater_dur * 1000);
+
+ usleep_range(wait_eoc_us, wait_eoc_us + 100);
+
+ ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check);
+ if (ret) {
+ dev_err(dev, "failed to read measurement status register.\n");
+ return ret;
+ }
+ if (check & BME680_MEAS_BIT) {
+ dev_err(dev, "Device measurement cycle incomplete.\n");
+ return -EBUSY;
+ }
+ if (!(check & BME680_NEW_DATA_BIT)) {
+ dev_err(dev, "No new data available from the device.\n");
+ return -ENODATA;
+ }
+
+ return 0;
+}
+
static int bme680_chip_config(struct bme680_data *data)
{
struct device *dev = regmap_get_device(data->regmap);
@@ -622,6 +660,10 @@ static int bme680_read_temp(struct bme680_data *data, int *val)
if (ret < 0)
return ret;
+ ret = bme680_wait_for_eoc(data);
+ if (ret)
+ return ret;
+
ret = regmap_bulk_read(data->regmap, BME680_REG_TEMP_MSB,
&tmp, 3);
if (ret < 0) {
@@ -678,7 +720,7 @@ static int bme680_read_press(struct bme680_data *data,
}
*val = bme680_compensate_press(data, adc_press);
- *val2 = 100;
+ *val2 = 1000;
return IIO_VAL_FRACTIONAL;
}
@@ -738,6 +780,10 @@ static int bme680_read_gas(struct bme680_data *data,
if (ret < 0)
return ret;
+ ret = bme680_wait_for_eoc(data);
+ if (ret)
+ return ret;
+
ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check);
if (check & BME680_GAS_MEAS_BIT) {
dev_err(dev, "gas measurement incomplete\n");
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index 3c2bf620f00f..ee0d9798d8b4 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -133,7 +133,7 @@ config AD5624R_SPI
config AD9739A
tristate "Analog Devices AD9739A RF DAC spi driver"
- depends on SPI || COMPILE_TEST
+ depends on SPI
select REGMAP_SPI
select IIO_BACKEND
help
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c
index cdc4789213ba..a82dcc3da421 100644
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -19,6 +19,7 @@
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/pm.h>
@@ -66,8 +67,10 @@
#define HDC3020_CRC8_POLYNOMIAL 0x31
-#define HDC3020_MIN_TEMP -40
-#define HDC3020_MAX_TEMP 125
+#define HDC3020_MIN_TEMP_MICRO -39872968
+#define HDC3020_MAX_TEMP_MICRO 124875639
+#define HDC3020_MAX_TEMP_HYST_MICRO 164748607
+#define HDC3020_MAX_HUM_MICRO 99220264
struct hdc3020_data {
struct i2c_client *client;
@@ -368,6 +371,105 @@ static int hdc3020_write_raw(struct iio_dev *indio_dev,
return -EINVAL;
}
+static int hdc3020_thresh_get_temp(u16 thresh)
+{
+ int temp;
+
+ /*
+ * Get the temperature threshold from 9 LSBs, shift them to get
+ * the truncated temperature threshold representation and
+ * calculate the threshold according to the formula in the
+ * datasheet. Result is degree celsius scaled by 65535.
+ */
+ temp = FIELD_GET(HDC3020_THRESH_TEMP_MASK, thresh) <<
+ HDC3020_THRESH_TEMP_TRUNC_SHIFT;
+
+ return -2949075 + (175 * temp);
+}
+
+static int hdc3020_thresh_get_hum(u16 thresh)
+{
+ int hum;
+
+ /*
+ * Get the humidity threshold from 7 MSBs, shift them to get the
+ * truncated humidity threshold representation and calculate the
+ * threshold according to the formula in the datasheet. Result is
+ * percent scaled by 65535.
+ */
+ hum = FIELD_GET(HDC3020_THRESH_HUM_MASK, thresh) <<
+ HDC3020_THRESH_HUM_TRUNC_SHIFT;
+
+ return hum * 100;
+}
+
+static u16 hdc3020_thresh_set_temp(int s_temp, u16 curr_thresh)
+{
+ u64 temp;
+ u16 thresh;
+
+ /*
+ * Calculate temperature threshold, shift it down to get the
+ * truncated threshold representation in the 9LSBs while keeping
+ * the current humidity threshold in the 7 MSBs.
+ */
+ temp = (u64)(s_temp + 45000000) * 65535ULL;
+ temp = div_u64(temp, 1000000 * 175) >> HDC3020_THRESH_TEMP_TRUNC_SHIFT;
+ thresh = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, temp);
+ thresh |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, curr_thresh) <<
+ HDC3020_THRESH_HUM_TRUNC_SHIFT);
+
+ return thresh;
+}
+
+static u16 hdc3020_thresh_set_hum(int s_hum, u16 curr_thresh)
+{
+ u64 hum;
+ u16 thresh;
+
+ /*
+ * Calculate humidity threshold, shift it down and up to get the
+ * truncated threshold representation in the 7MSBs while keeping
+ * the current temperature threshold in the 9 LSBs.
+ */
+ hum = (u64)(s_hum) * 65535ULL;
+ hum = div_u64(hum, 1000000 * 100) >> HDC3020_THRESH_HUM_TRUNC_SHIFT;
+ thresh = FIELD_PREP(HDC3020_THRESH_HUM_MASK, hum);
+ thresh |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, curr_thresh);
+
+ return thresh;
+}
+
+static
+int hdc3020_thresh_clr(s64 s_thresh, s64 s_hyst, enum iio_event_direction dir)
+{
+ s64 s_clr;
+
+ /*
+ * Include directions when calculation the clear value,
+ * since hysteresis is unsigned by definition and the
+ * clear value is an absolute value which is signed.
+ */
+ if (dir == IIO_EV_DIR_RISING)
+ s_clr = s_thresh - s_hyst;
+ else
+ s_clr = s_thresh + s_hyst;
+
+ /* Divide by 65535 to get units of micro */
+ return div_s64(s_clr, 65535);
+}
+
+static int _hdc3020_write_thresh(struct hdc3020_data *data, u16 reg, u16 val)
+{
+ u8 buf[5];
+
+ put_unaligned_be16(reg, buf);
+ put_unaligned_be16(val, buf + 2);
+ buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE);
+
+ return hdc3020_write_bytes(data, buf, 5);
+}
+
static int hdc3020_write_thresh(struct iio_dev *indio_dev,
const struct iio_chan_spec *chan,
enum iio_event_type type,
@@ -376,67 +478,126 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
int val, int val2)
{
struct hdc3020_data *data = iio_priv(indio_dev);
- u8 buf[5];
- u64 tmp;
- u16 reg;
- int ret;
-
- /* Supported temperature range is from –40 to 125 degree celsius */
- if (val < HDC3020_MIN_TEMP || val > HDC3020_MAX_TEMP)
- return -EINVAL;
-
- /* Select threshold register */
- if (info == IIO_EV_INFO_VALUE) {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_S_T_RH_THRESH_HIGH;
- else
- reg = HDC3020_S_T_RH_THRESH_LOW;
+ u16 reg, reg_val, reg_thresh_rd, reg_clr_rd, reg_thresh_wr, reg_clr_wr;
+ s64 s_thresh, s_hyst, s_clr;
+ int s_val, thresh, clr, ret;
+
+ /* Select threshold registers */
+ if (dir == IIO_EV_DIR_RISING) {
+ reg_thresh_rd = HDC3020_R_T_RH_THRESH_HIGH;
+ reg_thresh_wr = HDC3020_S_T_RH_THRESH_HIGH;
+ reg_clr_rd = HDC3020_R_T_RH_THRESH_HIGH_CLR;
+ reg_clr_wr = HDC3020_S_T_RH_THRESH_HIGH_CLR;
} else {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_S_T_RH_THRESH_HIGH_CLR;
- else
- reg = HDC3020_S_T_RH_THRESH_LOW_CLR;
+ reg_thresh_rd = HDC3020_R_T_RH_THRESH_LOW;
+ reg_thresh_wr = HDC3020_S_T_RH_THRESH_LOW;
+ reg_clr_rd = HDC3020_R_T_RH_THRESH_LOW_CLR;
+ reg_clr_wr = HDC3020_S_T_RH_THRESH_LOW_CLR;
}
guard(mutex)(&data->lock);
- ret = hdc3020_read_be16(data, reg);
+ ret = hdc3020_read_be16(data, reg_thresh_rd);
+ if (ret < 0)
+ return ret;
+
+ thresh = ret;
+ ret = hdc3020_read_be16(data, reg_clr_rd);
if (ret < 0)
return ret;
+ clr = ret;
+ /* Scale value to include decimal part into calculations */
+ s_val = (val < 0) ? (val * 1000000 - val2) : (val * 1000000 + val2);
switch (chan->type) {
case IIO_TEMP:
- /*
- * Calculate temperature threshold, shift it down to get the
- * truncated threshold representation in the 9LSBs while keeping
- * the current humidity threshold in the 7 MSBs.
- */
- tmp = ((u64)(((val + 45) * MICRO) + val2)) * 65535ULL;
- tmp = div_u64(tmp, MICRO * 175);
- val = tmp >> HDC3020_THRESH_TEMP_TRUNC_SHIFT;
- val = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, val);
- val |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, ret) <<
- HDC3020_THRESH_HUM_TRUNC_SHIFT);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ s_val = max(s_val, HDC3020_MIN_TEMP_MICRO);
+ s_val = min(s_val, HDC3020_MAX_TEMP_MICRO);
+ reg = reg_thresh_wr;
+ reg_val = hdc3020_thresh_set_temp(s_val, thresh);
+ ret = _hdc3020_write_thresh(data, reg, reg_val);
+ if (ret < 0)
+ return ret;
+
+ /* Calculate old hysteresis */
+ s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
+ s_clr = (s64)hdc3020_thresh_get_temp(clr) * 1000000;
+ s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ /* Set new threshold */
+ thresh = reg_val;
+ /* Set old hysteresis */
+ s_val = s_hyst;
+ fallthrough;
+ case IIO_EV_INFO_HYSTERESIS:
+ /*
+ * Function hdc3020_thresh_get_temp returns temperature
+ * in degree celsius scaled by 65535. Scale by 1000000
+ * to be able to subtract scaled hysteresis value.
+ */
+ s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
+ /*
+ * Units of s_val are in micro degree celsius, scale by
+ * 65535 to get same units as s_thresh.
+ */
+ s_val = min(abs(s_val), HDC3020_MAX_TEMP_HYST_MICRO);
+ s_hyst = (s64)s_val * 65535;
+ s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
+ s_clr = max(s_clr, HDC3020_MIN_TEMP_MICRO);
+ s_clr = min(s_clr, HDC3020_MAX_TEMP_MICRO);
+ reg = reg_clr_wr;
+ reg_val = hdc3020_thresh_set_temp(s_clr, clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
break;
case IIO_HUMIDITYRELATIVE:
- /*
- * Calculate humidity threshold, shift it down and up to get the
- * truncated threshold representation in the 7MSBs while keeping
- * the current temperature threshold in the 9 LSBs.
- */
- tmp = ((u64)((val * MICRO) + val2)) * 65535ULL;
- tmp = div_u64(tmp, MICRO * 100);
- val = tmp >> HDC3020_THRESH_HUM_TRUNC_SHIFT;
- val = FIELD_PREP(HDC3020_THRESH_HUM_MASK, val);
- val |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret);
+ s_val = (s_val < 0) ? 0 : min(s_val, HDC3020_MAX_HUM_MICRO);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ reg = reg_thresh_wr;
+ reg_val = hdc3020_thresh_set_hum(s_val, thresh);
+ ret = _hdc3020_write_thresh(data, reg, reg_val);
+ if (ret < 0)
+ return ret;
+
+ /* Calculate old hysteresis */
+ s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
+ s_clr = (s64)hdc3020_thresh_get_hum(clr) * 1000000;
+ s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ /* Set new threshold */
+ thresh = reg_val;
+ /* Try to set old hysteresis */
+ s_val = min(abs(s_hyst), HDC3020_MAX_HUM_MICRO);
+ fallthrough;
+ case IIO_EV_INFO_HYSTERESIS:
+ /*
+ * Function hdc3020_thresh_get_hum returns relative
+ * humidity in percent scaled by 65535. Scale by 1000000
+ * to be able to subtract scaled hysteresis value.
+ */
+ s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
+ /*
+ * Units of s_val are in micro percent, scale by 65535
+ * to get same units as s_thresh.
+ */
+ s_hyst = (s64)s_val * 65535;
+ s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
+ s_clr = max(s_clr, 0);
+ s_clr = min(s_clr, HDC3020_MAX_HUM_MICRO);
+ reg = reg_clr_wr;
+ reg_val = hdc3020_thresh_set_hum(s_clr, clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
break;
default:
return -EOPNOTSUPP;
}
- put_unaligned_be16(reg, buf);
- put_unaligned_be16(val, buf + 2);
- buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE);
- return hdc3020_write_bytes(data, buf, 5);
+ return _hdc3020_write_thresh(data, reg, reg_val);
}
static int hdc3020_read_thresh(struct iio_dev *indio_dev,
@@ -447,48 +608,60 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev,
int *val, int *val2)
{
struct hdc3020_data *data = iio_priv(indio_dev);
- u16 reg;
- int ret;
+ u16 reg_thresh, reg_clr;
+ int thresh, clr, ret;
- /* Select threshold register */
- if (info == IIO_EV_INFO_VALUE) {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_R_T_RH_THRESH_HIGH;
- else
- reg = HDC3020_R_T_RH_THRESH_LOW;
+ /* Select threshold registers */
+ if (dir == IIO_EV_DIR_RISING) {
+ reg_thresh = HDC3020_R_T_RH_THRESH_HIGH;
+ reg_clr = HDC3020_R_T_RH_THRESH_HIGH_CLR;
} else {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_R_T_RH_THRESH_HIGH_CLR;
- else
- reg = HDC3020_R_T_RH_THRESH_LOW_CLR;
+ reg_thresh = HDC3020_R_T_RH_THRESH_LOW;
+ reg_clr = HDC3020_R_T_RH_THRESH_LOW_CLR;
}
guard(mutex)(&data->lock);
- ret = hdc3020_read_be16(data, reg);
+ ret = hdc3020_read_be16(data, reg_thresh);
if (ret < 0)
return ret;
switch (chan->type) {
case IIO_TEMP:
- /*
- * Get the temperature threshold from 9 LSBs, shift them to get
- * the truncated temperature threshold representation and
- * calculate the threshold according to the formula in the
- * datasheet.
- */
- *val = FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret);
- *val = *val << HDC3020_THRESH_TEMP_TRUNC_SHIFT;
- *val = -2949075 + (175 * (*val));
+ thresh = hdc3020_thresh_get_temp(ret);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ *val = thresh;
+ break;
+ case IIO_EV_INFO_HYSTERESIS:
+ ret = hdc3020_read_be16(data, reg_clr);
+ if (ret < 0)
+ return ret;
+
+ clr = hdc3020_thresh_get_temp(ret);
+ *val = abs(thresh - clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
*val2 = 65535;
return IIO_VAL_FRACTIONAL;
case IIO_HUMIDITYRELATIVE:
- /*
- * Get the humidity threshold from 7 MSBs, shift them to get the
- * truncated humidity threshold representation and calculate the
- * threshold according to the formula in the datasheet.
- */
- *val = FIELD_GET(HDC3020_THRESH_HUM_MASK, ret);
- *val = (*val << HDC3020_THRESH_HUM_TRUNC_SHIFT) * 100;
+ thresh = hdc3020_thresh_get_hum(ret);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ *val = thresh;
+ break;
+ case IIO_EV_INFO_HYSTERESIS:
+ ret = hdc3020_read_be16(data, reg_clr);
+ if (ret < 0)
+ return ret;
+
+ clr = hdc3020_thresh_get_hum(ret);
+ *val = abs(thresh - clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
*val2 = 65535;
return IIO_VAL_FRACTIONAL;
default:
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 16de57846bd9..2e84776f4fbd 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -315,7 +315,7 @@ int iio_trigger_attach_poll_func(struct iio_trigger *trig,
* this is the case if the IIO device and the trigger device share the
* same parent device.
*/
- if (iio_validate_own_trigger(pf->indio_dev, trig))
+ if (!iio_validate_own_trigger(pf->indio_dev, trig))
trig->attached_own_device = true;
return ret;
diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c
index d6627b3e6000..66a063ea3db4 100644
--- a/drivers/iio/light/apds9306.c
+++ b/drivers/iio/light/apds9306.c
@@ -583,8 +583,8 @@ static int apds9306_intg_time_set(struct apds9306_data *data, int val2)
return ret;
intg_old = iio_gts_find_int_time_by_sel(&data->gts, intg_time_idx);
- if (ret < 0)
- return ret;
+ if (intg_old < 0)
+ return intg_old;
if (intg_old == val2)
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 9dca451ed522..6974922e5609 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -107,8 +107,6 @@ struct bnxt_re_gsi_context {
struct bnxt_re_sqp_entries *sqp_tbl;
};
-#define BNXT_RE_MIN_MSIX 2
-#define BNXT_RE_MAX_MSIX 9
#define BNXT_RE_AEQ_IDX 0
#define BNXT_RE_NQ_IDX 1
#define BNXT_RE_GEN_P5_MAX_VF 64
@@ -168,7 +166,7 @@ struct bnxt_re_dev {
struct bnxt_qplib_rcfw rcfw;
/* NQ */
- struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX];
/* Device Resources */
struct bnxt_qplib_dev_attr dev_attr;
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 4f13423ecdbd..887b09dd86e7 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -112,6 +112,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
"start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
start, iova, length, access_flags);
+ access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~VALID_MR_FLAGS)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2366c46eebc8..43660c831b22 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3759,10 +3759,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
return 0;
-err:
- mlx5r_macsec_dealloc_gids(dev);
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
+err:
+ mlx5r_macsec_dealloc_gids(dev);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ecc111ed5d86..d3c1f63791a2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -246,6 +246,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2,
(ent->rb_key.access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
MLX5_SET(mkc, mkc, translations_octword_size,
get_mkc_octo_size(ent->rb_key.access_mode,
@@ -641,10 +642,8 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
new = &((*new)->rb_left);
if (cmp < 0)
new = &((*new)->rb_right);
- if (cmp == 0) {
- mutex_unlock(&cache->rb_lock);
+ if (cmp == 0)
return -EEXIST;
- }
}
/* Add new node and rebalance tree. */
@@ -719,6 +718,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
}
mr->mmkey.cache_ent = ent;
mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.rb_key = ent->rb_key;
+ mr->mmkey.cacheable = true;
init_waitqueue_head(&mr->mmkey.wait);
return mr;
}
@@ -1169,7 +1170,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->ibmr.pd = pd;
mr->umem = umem;
mr->page_shift = order_base_2(page_size);
- mr->mmkey.cacheable = true;
set_mr_fields(dev, mr, umem->length, access_flags, iova);
return mr;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index a056ea835da5..84be0c3d5699 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -199,17 +199,20 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
int err;
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) /
+ sizeof(struct mlx5_wqe_data_seg);
if (init_attr->srq_type != IB_SRQT_BASIC &&
init_attr->srq_type != IB_SRQT_XRC &&
init_attr->srq_type != IB_SRQT_TM)
return -EOPNOTSUPP;
- /* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= max_srq_wqes) {
- mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
- init_attr->attr.max_wr,
- max_srq_wqes);
+ /* Sanity check SRQ and sge size before proceeding */
+ if (init_attr->attr.max_wr >= max_srq_wqes ||
+ init_attr->attr.max_sge > max_sge_sz) {
+ mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n",
+ init_attr->attr.max_wr, max_srq_wqes,
+ init_attr->attr.max_sge, max_sge_sz);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c6a7fa3054fa..6596a85723c9 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -344,6 +344,19 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
* receive buffer later. For rmda operations additional
* length checks are performed in check_rkey.
*/
+ if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) {
+ unsigned int payload = payload_size(pkt);
+ unsigned int recv_buffer_len = 0;
+ int i;
+
+ for (i = 0; i < qp->resp.wqe->dma.num_sge; i++)
+ recv_buffer_len += qp->resp.wqe->dma.sge[i].length;
+ if (payload + 40 > recv_buffer_len) {
+ rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+
if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
(qp_type(qp) == IB_QPT_UC))) {
unsigned int mtu = qp->mtu;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index c7d4d8ab5a09..de6238ee4379 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -812,7 +812,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
int i;
for (i = 0; i < ibwr->num_sge; i++, sge++) {
- memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length);
+ memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length);
p += sge->length;
}
}
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 70f0654c58b6..2b8370ecf42a 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -209,6 +209,7 @@ static const struct xpad_device {
{ 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 },
{ 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 },
{ 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE },
+ { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE },
{ 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX },
{ 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX },
{ 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 4e38229404b4..b4723ea395eb 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1477,15 +1477,46 @@ static void elantech_disconnect(struct psmouse *psmouse)
}
/*
+ * Some hw_version 4 models fail to properly activate absolute mode on
+ * resume without going through disable/enable cycle.
+ */
+static const struct dmi_system_id elantech_needs_reenable[] = {
+#if defined(CONFIG_DMI) && defined(CONFIG_X86)
+ {
+ /* Lenovo N24 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "81AF"),
+ },
+ },
+#endif
+ { }
+};
+
+/*
* Put the touchpad back into absolute mode when reconnecting
*/
static int elantech_reconnect(struct psmouse *psmouse)
{
+ int err;
+
psmouse_reset(psmouse);
if (elantech_detect(psmouse, 0))
return -1;
+ if (dmi_check_system(elantech_needs_reenable)) {
+ err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE);
+ if (err)
+ psmouse_warn(psmouse, "failed to deactivate mouse on %s: %d\n",
+ psmouse->ps2dev.serio->phys, err);
+
+ err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE);
+ if (err)
+ psmouse_warn(psmouse, "failed to reactivate mouse on %s: %d\n",
+ psmouse->ps2dev.serio->phys, err);
+ }
+
if (elantech_set_absolute_mode(psmouse)) {
psmouse_err(psmouse,
"failed to put touchpad back into absolute mode.\n");
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index dfc6c581873b..5b50475ec414 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -76,7 +76,7 @@ static inline void i8042_write_command(int val)
#define SERIO_QUIRK_PROBE_DEFER BIT(5)
#define SERIO_QUIRK_RESET_ALWAYS BIT(6)
#define SERIO_QUIRK_RESET_NEVER BIT(7)
-#define SERIO_QUIRK_DIECT BIT(8)
+#define SERIO_QUIRK_DIRECT BIT(8)
#define SERIO_QUIRK_DUMBKBD BIT(9)
#define SERIO_QUIRK_NOLOOP BIT(10)
#define SERIO_QUIRK_NOTIMEOUT BIT(11)
@@ -1332,6 +1332,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ {
+ /*
+ * The Ayaneo Kun is a handheld device where some the buttons
+ * are handled by an AT keyboard. The keyboard is usually
+ * detected as raw, but sometimes, usually after a cold boot,
+ * it is detected as translated. Make sure that the keyboard
+ * is always in raw mode.
+ */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"),
+ DMI_MATCH(DMI_BOARD_NAME, "KUN"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_DIRECT)
+ },
{ }
};
@@ -1655,7 +1669,7 @@ static void __init i8042_check_quirks(void)
if (quirks & SERIO_QUIRK_RESET_NEVER)
i8042_reset = I8042_RESET_NEVER;
}
- if (quirks & SERIO_QUIRK_DIECT)
+ if (quirks & SERIO_QUIRK_DIRECT)
i8042_direct = true;
if (quirks & SERIO_QUIRK_DUMBKBD)
i8042_dumbkbd = true;
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index d2bbb436a77d..4d13db13b9e5 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1111,6 +1111,16 @@ static const struct of_device_id ads7846_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, ads7846_dt_ids);
+static const struct spi_device_id ads7846_spi_ids[] = {
+ { "tsc2046", 7846 },
+ { "ads7843", 7843 },
+ { "ads7845", 7845 },
+ { "ads7846", 7846 },
+ { "ads7873", 7873 },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, ads7846_spi_ids);
+
static const struct ads7846_platform_data *ads7846_get_props(struct device *dev)
{
struct ads7846_platform_data *pdata;
@@ -1386,10 +1396,10 @@ static struct spi_driver ads7846_driver = {
},
.probe = ads7846_probe,
.remove = ads7846_remove,
+ .id_table = ads7846_spi_ids,
};
module_spi_driver(ads7846_driver);
MODULE_DESCRIPTION("ADS7846 TouchScreen Driver");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("spi:ads7846");
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 31ffdc2a93f3..79bdb2b10949 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -261,8 +261,8 @@ static int ili251x_read_touch_data(struct i2c_client *client, u8 *data)
if (!error && data[0] == 2) {
error = i2c_master_recv(client, data + ILI251X_DATA_SIZE1,
ILI251X_DATA_SIZE2);
- if (error >= 0 && error != ILI251X_DATA_SIZE2)
- error = -EIO;
+ if (error >= 0)
+ error = error == ILI251X_DATA_SIZE2 ? 0 : -EIO;
}
return error;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 161248067776..c89d85b54a1a 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2743,6 +2743,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
+ iommu_enable_gt(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c2703599bb16..b19e8c0f48fa 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2061,6 +2061,12 @@ static void do_detach(struct iommu_dev_data *dev_data)
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ /* Clear DTE and flush the entry */
+ amd_iommu_dev_update_dte(dev_data, false);
+
+ /* Flush IOTLB and wait for the flushes to finish */
+ amd_iommu_domain_flush_all(domain);
+
/* Clear GCR3 table */
if (pdom_is_sva_capable(domain))
destroy_gcr3_table(dev_data, domain);
@@ -2069,12 +2075,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
dev_data->domain = NULL;
list_del(&dev_data->list);
- /* Clear DTE and flush the entry */
- amd_iommu_dev_update_dte(dev_data, false);
-
- /* Flush IOTLB and wait for the flushes to finish */
- amd_iommu_domain_flush_all(domain);
-
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
domain->dev_cnt -= 1;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2e9811bf2a4e..fd11a080380c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2114,12 +2114,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
return ret;
- ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
- if (ret) {
- domain_detach_iommu(domain, iommu);
- return ret;
- }
-
info->domain = domain;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
@@ -2137,15 +2131,21 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
else
ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
- if (ret) {
- device_block_translation(dev);
- return ret;
- }
+ if (ret)
+ goto out_block_translation;
if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
iommu_enable_pci_caps(info);
+ ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
+ if (ret)
+ goto out_block_translation;
+
return 0;
+
+out_block_translation:
+ device_block_translation(dev);
+ return ret;
}
/**
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index c7ddebf312ad..b1f2080be2be 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -15,6 +15,7 @@
#include <linux/irqchip/chained_irq.h>
#include <linux/kernel.h>
#include <linux/syscore_ops.h>
+#include <asm/numa.h>
#define EIOINTC_REG_NODEMAP 0x14a0
#define EIOINTC_REG_IPMAP 0x14c0
@@ -339,7 +340,7 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
int node;
if (cpu_has_flatmode)
- node = cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE);
+ node = early_cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE);
else
node = eiointc_priv[nr_pics - 1]->node;
@@ -431,7 +432,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent,
goto out_free_handle;
if (cpu_has_flatmode)
- node = cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE);
+ node = early_cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE);
else
node = acpi_eiointc->node;
acpi_set_vec_parent(node, priv->eiointc_domain, pch_group);
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index e4b33aed1c97..7c4fe7ab4b83 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -28,7 +28,7 @@
#define LIOINTC_INTC_CHIP_START 0x20
-#define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20)
+#define LIOINTC_REG_INTC_STATUS(core) (LIOINTC_INTC_CHIP_START + 0x20 + (core) * 8)
#define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04)
#define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08)
#define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c)
@@ -217,7 +217,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision,
goto out_free_priv;
for (i = 0; i < LIOINTC_NUM_CORES; i++)
- priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS;
+ priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS(i);
for (i = 0; i < LIOINTC_NUM_PARENT; i++)
priv->handler[i].parent_int_map = parent_int_map[i];
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4d11fc664cb0..b5d6ef430b86 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -897,7 +897,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors, struct block_device *cached_bdev,
const struct block_device_operations *ops)
{
- struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
struct queue_limits lim = {
@@ -909,6 +908,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
.io_min = block_size,
.logical_block_size = block_size,
.physical_block_size = block_size,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA,
};
uint64_t n;
int idx;
@@ -974,13 +974,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->minors = BCACHE_MINORS;
d->disk->fops = ops;
d->disk->private_data = d;
-
- q = d->disk->queue;
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
-
- blk_queue_write_cache(q, true, true);
-
return 0;
out_bioset_exit:
@@ -1423,8 +1416,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
}
if (bdev_io_opt(dc->bdev))
- dc->partial_stripes_expensive =
- q->limits.raid_partial_stripes_expensive;
+ dc->partial_stripes_expensive = !!(q->limits.features &
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
ret = bcache_device_init(&dc->disk, block_size,
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 16884b585053..2d8dd9283ff4 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3403,7 +3403,6 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
limits->discard_granularity = origin_limits->discard_granularity;
limits->discard_alignment = origin_limits->discard_alignment;
- limits->discard_misaligned = origin_limits->discard_misaligned;
}
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index ad79b52ffc14..b4384a8b13e3 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2059,7 +2059,6 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
limits->discard_granularity = dest_limits->discard_granularity;
limits->discard_alignment = dest_limits->discard_alignment;
- limits->discard_misaligned = dest_limits->discard_misaligned;
limits->max_discard_segments = dest_limits->max_discard_segments;
}
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 08700bfc3e23..14a44c0f8286 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -206,7 +206,6 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
- unsigned integrity_added:1;
/*
* Indicates the rw permissions for the new logical device. This
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1b7a97cc3779..6c013ceb0e5f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1176,8 +1176,8 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
struct mapped_device *md = dm_table_get_md(ti->table);
- /* From now we require underlying device with our integrity profile */
- if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
+ /* We require an underlying device with non-PI metadata */
+ if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
ti->error = "Integrity profile not supported.";
return -EINVAL;
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 417fddebe367..2a89f8eb4713 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -350,25 +350,6 @@ static struct kmem_cache *journal_io_cache;
#define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
#endif
-static void dm_integrity_prepare(struct request *rq)
-{
-}
-
-static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
-{
-}
-
-/*
- * DM Integrity profile, protection is performed layer above (dm-crypt)
- */
-static const struct blk_integrity_profile dm_integrity_profile = {
- .name = "DM-DIF-EXT-TAG",
- .generate_fn = NULL,
- .verify_fn = NULL,
- .prepare_fn = dm_integrity_prepare,
- .complete_fn = dm_integrity_complete,
-};
-
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
static void integrity_bio_wait(struct work_struct *w);
static void dm_integrity_dtr(struct dm_target *ti);
@@ -3494,6 +3475,17 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
limits->dma_alignment = limits->logical_block_size - 1;
limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
}
+
+ if (!ic->internal_hash) {
+ struct blk_integrity *bi = &limits->integrity;
+
+ memset(bi, 0, sizeof(*bi));
+ bi->tuple_size = ic->tag_size;
+ bi->tag_size = bi->tuple_size;
+ bi->interval_exp =
+ ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
+ }
+
limits->max_integrity_segments = USHRT_MAX;
}
@@ -3650,20 +3642,6 @@ try_smaller_buffer:
return 0;
}
-static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
-{
- struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
- struct blk_integrity bi;
-
- memset(&bi, 0, sizeof(bi));
- bi.profile = &dm_integrity_profile;
- bi.tuple_size = ic->tag_size;
- bi.tag_size = bi.tuple_size;
- bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
-
- blk_integrity_register(disk, &bi);
-}
-
static void dm_integrity_free_page_list(struct page_list *pl)
{
unsigned int i;
@@ -4649,9 +4627,6 @@ try_smaller_buffer:
}
}
- if (!ic->internal_hash)
- dm_integrity_set(ti, ic);
-
ti->num_flush_bios = 1;
ti->flush_supported = true;
if (ic->discard)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index abe88d1e6735..052c00c1eb15 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3542,7 +3542,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
recovery = rs->md.recovery;
state = decipher_sync_action(mddev, recovery);
progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
- resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
+ resync_mismatches = mddev->last_sync_action == ACTION_CHECK ?
atomic64_read(&mddev->resync_mismatches) : 0;
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index b2d5246cff21..92d18dcdb3e9 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -425,6 +425,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
q->limits.logical_block_size,
q->limits.alignment_offset,
(unsigned long long) start << SECTOR_SHIFT);
+
+ /*
+ * Only stack the integrity profile if the target doesn't have native
+ * integrity support.
+ */
+ if (!dm_target_has_integrity(ti->type))
+ queue_limits_stack_integrity_bdev(limits, bdev);
return 0;
}
@@ -572,6 +579,12 @@ int dm_split_args(int *argc, char ***argvp, char *input)
return 0;
}
+static void dm_set_stacking_limits(struct queue_limits *limits)
+{
+ blk_set_stacking_limits(limits);
+ limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
+}
+
/*
* Impose necessary and sufficient conditions on a devices's table such
* that any incoming bio which respects its logical_block_size can be
@@ -610,7 +623,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t,
for (i = 0; i < t->num_targets; i++) {
ti = dm_table_get_target(t, i);
- blk_set_stacking_limits(&ti_limits);
+ dm_set_stacking_limits(&ti_limits);
/* combine all target devices' limits */
if (ti->type->iterate_devices)
@@ -702,9 +715,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->immutable_target_type = ti->type;
}
- if (dm_target_has_integrity(ti->type))
- t->integrity_added = 1;
-
ti->table = t;
ti->begin = start;
ti->len = len;
@@ -1014,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}
-static bool dm_table_supports_poll(struct dm_table *t);
-
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
unsigned int per_io_data_size = 0, front_pad, io_front_pad;
unsigned int min_pool_size = 0, pool_size;
struct dm_md_mempools *pools;
+ unsigned int bioset_flags = 0;
if (unlikely(type == DM_TYPE_NONE)) {
DMERR("no table type is set, can't allocate mempools");
@@ -1038,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
goto init_bs;
}
+ if (md->queue->limits.features & BLK_FEAT_POLL)
+ bioset_flags |= BIOSET_PERCPU_CACHE;
+
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
@@ -1050,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
io_front_pad = roundup(per_io_data_size,
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
- dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
+ if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
goto out_free_pools;
if (t->integrity_supported &&
bioset_integrity_create(&pools->io_bs, pool_size))
@@ -1119,99 +1130,6 @@ static int dm_table_build_index(struct dm_table *t)
return r;
}
-static bool integrity_profile_exists(struct gendisk *disk)
-{
- return !!blk_get_integrity(disk);
-}
-
-/*
- * Get a disk whose integrity profile reflects the table's profile.
- * Returns NULL if integrity support was inconsistent or unavailable.
- */
-static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t)
-{
- struct list_head *devices = dm_table_get_devices(t);
- struct dm_dev_internal *dd = NULL;
- struct gendisk *prev_disk = NULL, *template_disk = NULL;
-
- for (unsigned int i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = dm_table_get_target(t, i);
-
- if (!dm_target_passes_integrity(ti->type))
- goto no_integrity;
- }
-
- list_for_each_entry(dd, devices, list) {
- template_disk = dd->dm_dev->bdev->bd_disk;
- if (!integrity_profile_exists(template_disk))
- goto no_integrity;
- else if (prev_disk &&
- blk_integrity_compare(prev_disk, template_disk) < 0)
- goto no_integrity;
- prev_disk = template_disk;
- }
-
- return template_disk;
-
-no_integrity:
- if (prev_disk)
- DMWARN("%s: integrity not set: %s and %s profile mismatch",
- dm_device_name(t->md),
- prev_disk->disk_name,
- template_disk->disk_name);
- return NULL;
-}
-
-/*
- * Register the mapped device for blk_integrity support if the
- * underlying devices have an integrity profile. But all devices may
- * not have matching profiles (checking all devices isn't reliable
- * during table load because this table may use other DM device(s) which
- * must be resumed before they will have an initialized integity
- * profile). Consequently, stacked DM devices force a 2 stage integrity
- * profile validation: First pass during table load, final pass during
- * resume.
- */
-static int dm_table_register_integrity(struct dm_table *t)
-{
- struct mapped_device *md = t->md;
- struct gendisk *template_disk = NULL;
-
- /* If target handles integrity itself do not register it here. */
- if (t->integrity_added)
- return 0;
-
- template_disk = dm_table_get_integrity_disk(t);
- if (!template_disk)
- return 0;
-
- if (!integrity_profile_exists(dm_disk(md))) {
- t->integrity_supported = true;
- /*
- * Register integrity profile during table load; we can do
- * this because the final profile must match during resume.
- */
- blk_integrity_register(dm_disk(md),
- blk_get_integrity(template_disk));
- return 0;
- }
-
- /*
- * If DM device already has an initialized integrity
- * profile the new profile should not conflict.
- */
- if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
- DMERR("%s: conflict with existing integrity profile: %s profile mismatch",
- dm_device_name(t->md),
- template_disk->disk_name);
- return 1;
- }
-
- /* Preserve existing integrity profile */
- t->integrity_supported = true;
- return 0;
-}
-
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct dm_crypto_profile {
@@ -1423,12 +1341,6 @@ int dm_table_complete(struct dm_table *t)
return r;
}
- r = dm_table_register_integrity(t);
- if (r) {
- DMERR("could not register integrity profile.");
- return r;
- }
-
r = dm_table_construct_crypto_profile(t);
if (r) {
DMERR("could not construct crypto profile.");
@@ -1493,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
return &t->targets[(KEYS_PER_NODE * n) + k];
}
-static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags);
-}
-
/*
* type->iterate_devices() should be called when the sanity check needs to
* iterate and check all underlying data devices. iterate_devices() will
@@ -1548,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
-static bool dm_table_supports_poll(struct dm_table *t)
-{
- for (unsigned int i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = dm_table_get_target(t, i);
-
- if (!ti->type->iterate_devices ||
- ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
- return false;
- }
-
- return true;
-}
-
/*
* Check whether a table has no data devices attached using each
* target's iterate_devices method.
@@ -1686,12 +1577,20 @@ int dm_calculate_queue_limits(struct dm_table *t,
unsigned int zone_sectors = 0;
bool zoned = false;
- blk_set_stacking_limits(limits);
+ dm_set_stacking_limits(limits);
+
+ t->integrity_supported = true;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (!dm_target_passes_integrity(ti->type))
+ t->integrity_supported = false;
+ }
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
- blk_set_stacking_limits(&ti_limits);
+ dm_set_stacking_limits(&ti_limits);
if (!ti->type->iterate_devices) {
/* Set I/O hints portion of queue limits */
@@ -1706,12 +1605,12 @@ int dm_calculate_queue_limits(struct dm_table *t,
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
- if (!zoned && ti_limits.zoned) {
+ if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
/*
* After stacking all limits, validate all devices
* in table support this zoned model and zone sectors.
*/
- zoned = ti_limits.zoned;
+ zoned = (ti_limits.features & BLK_FEAT_ZONED);
zone_sectors = ti_limits.chunk_sectors;
}
@@ -1738,6 +1637,18 @@ combine_limits:
dm_device_name(t->md),
(unsigned long long) ti->begin,
(unsigned long long) ti->len);
+
+ if (t->integrity_supported ||
+ dm_target_has_integrity(ti->type)) {
+ if (!queue_limits_stack_integrity(limits, &ti_limits)) {
+ DMWARN("%s: adding target device (start sect %llu len %llu) "
+ "disabled integrity support due to incompatibility",
+ dm_device_name(t->md),
+ (unsigned long long) ti->begin,
+ (unsigned long long) ti->len);
+ t->integrity_supported = false;
+ }
+ }
}
/*
@@ -1747,12 +1658,12 @@ combine_limits:
* zoned model on host-managed zoned block devices.
* BUT...
*/
- if (limits->zoned) {
+ if (limits->features & BLK_FEAT_ZONED) {
/*
* ...IF the above limits stacking determined a zoned model
* validate that all of the table's devices conform to it.
*/
- zoned = limits->zoned;
+ zoned = limits->features & BLK_FEAT_ZONED;
zone_sectors = limits->chunk_sectors;
}
if (validate_hardware_zoned(t, zoned, zone_sectors))
@@ -1762,63 +1673,15 @@ combine_limits:
}
/*
- * Verify that all devices have an integrity profile that matches the
- * DM device's registered integrity profile. If the profiles don't
- * match then unregister the DM device's integrity profile.
+ * Check if a target requires flush support even if none of the underlying
+ * devices need it (e.g. to persist target-specific metadata).
*/
-static void dm_table_verify_integrity(struct dm_table *t)
-{
- struct gendisk *template_disk = NULL;
-
- if (t->integrity_added)
- return;
-
- if (t->integrity_supported) {
- /*
- * Verify that the original integrity profile
- * matches all the devices in this table.
- */
- template_disk = dm_table_get_integrity_disk(t);
- if (template_disk &&
- blk_integrity_compare(dm_disk(t->md), template_disk) >= 0)
- return;
- }
-
- if (integrity_profile_exists(dm_disk(t->md))) {
- DMWARN("%s: unable to establish an integrity profile",
- dm_device_name(t->md));
- blk_integrity_unregister(dm_disk(t->md));
- }
-}
-
-static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static bool dm_table_supports_flush(struct dm_table *t)
{
- unsigned long flush = (unsigned long) data;
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return (q->queue_flags & flush);
-}
-
-static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
-{
- /*
- * Require at least one underlying device to support flushes.
- * t->devices includes internal dm devices such as mirror logs
- * so we need to use iterate_devices here, which targets
- * supporting flushes must provide.
- */
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
- if (!ti->num_flush_bios)
- continue;
-
- if (ti->flush_supported)
- return true;
-
- if (ti->type->iterate_devices &&
- ti->type->iterate_devices(ti, device_flush_capable, (void *) flush))
+ if (ti->num_flush_bios && ti->flush_supported)
return true;
}
@@ -1839,20 +1702,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
return false;
}
-static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- return !bdev_nonrot(dev->bdev);
-}
-
-static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !blk_queue_add_random(q);
-}
-
static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -1877,12 +1726,6 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t)
return true;
}
-static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- return !bdev_nowait(dev->bdev);
-}
-
static bool dm_table_supports_nowait(struct dm_table *t)
{
for (unsigned int i = 0; i < t->num_targets; i++) {
@@ -1890,10 +1733,6 @@ static bool dm_table_supports_nowait(struct dm_table *t)
if (!dm_target_supports_nowait(ti->type))
return false;
-
- if (!ti->type->iterate_devices ||
- ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
- return false;
}
return true;
@@ -1950,29 +1789,25 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
-static int device_requires_stable_pages(struct dm_target *ti,
- struct dm_dev *dev, sector_t start,
- sector_t len, void *data)
-{
- return bdev_stable_writes(dev->bdev);
-}
-
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
- bool wc = false, fua = false;
int r;
- if (dm_table_supports_nowait(t))
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
+ if (!dm_table_supports_nowait(t))
+ limits->features &= ~BLK_FEAT_NOWAIT;
+
+ /*
+ * The current polling impementation does not support request based
+ * stacking.
+ */
+ if (!__table_type_bio_based(t->type))
+ limits->features &= ~BLK_FEAT_POLL;
if (!dm_table_supports_discards(t)) {
limits->max_hw_discard_sectors = 0;
limits->discard_granularity = 0;
limits->discard_alignment = 0;
- limits->discard_misaligned = 0;
}
if (!dm_table_supports_write_zeroes(t))
@@ -1981,58 +1816,22 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_secure_erase(t))
limits->max_secure_erase_sectors = 0;
- if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
- wc = true;
- if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
- fua = true;
- }
- blk_queue_write_cache(q, wc, fua);
+ if (dm_table_supports_flush(t))
+ limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
if (dm_table_supports_dax(t, device_not_dax_capable)) {
- blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+ limits->features |= BLK_FEAT_DAX;
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
set_dax_synchronous(t->md->dax_dev);
} else
- blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+ limits->features &= ~BLK_FEAT_DAX;
if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
dax_write_cache(t->md->dax_dev, true);
- /* Ensure that all underlying devices are non-rotational. */
- if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-
- dm_table_verify_integrity(t);
-
- /*
- * Some devices don't use blk_integrity but still want stable pages
- * because they do their own checksumming.
- * If any underlying device requires stable pages, a table must require
- * them as well. Only targets that support iterate_devices are considered:
- * don't want error, zero, etc to require stable pages.
- */
- if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
-
- /*
- * Determine whether or not this queue's I/O timings contribute
- * to the entropy pool, Only request-based targets use this.
- * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
- * have it set.
- */
- if (blk_queue_add_random(q) &&
- dm_table_any_dev_attr(t, device_is_not_random, NULL))
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
-
- /*
- * For a zoned target, setup the zones related queue attributes
- * and resources necessary for zone append emulation if necessary.
- */
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
+ /* For a zoned table, setup the zone related queue attributes. */
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (limits->features & BLK_FEAT_ZONED)) {
r = dm_set_zones_restrictions(t, q, limits);
if (r)
return r;
@@ -2042,22 +1841,18 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (r)
return r;
- dm_update_crypto_profile(q, t);
-
/*
- * Check for request-based device is left to
- * dm_mq_init_request_queue()->blk_mq_init_allocated_queue().
- *
- * For bio-based device, only set QUEUE_FLAG_POLL when all
- * underlying devices supporting polling.
+ * Now that the limits are set, check the zones mapped by the table
+ * and setup the resources for zone append emulation if necessary.
*/
- if (__table_type_bio_based(t->type)) {
- if (dm_table_supports_poll(t))
- blk_queue_flag_set(QUEUE_FLAG_POLL, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (limits->features & BLK_FEAT_ZONED)) {
+ r = dm_revalidate_zones(t, q);
+ if (r)
+ return r;
}
+ dm_update_crypto_profile(q, t);
return 0;
}
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index b423bec6458b..9d51f72a9d66 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits)
* The value is used by dm-thin to determine whether to pass down discards. The block layer
* splits large discards on this boundary when this is set.
*/
- limits->max_discard_sectors =
+ limits->max_hw_discard_sectors =
(vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK);
/*
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 5d66d916730e..c0d41c36e06e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -13,8 +13,6 @@
#define DM_MSG_PREFIX "zone"
-#define DM_ZONE_INVALID_WP_OFST UINT_MAX
-
/*
* For internal zone reports bypassing the top BIO submission path.
*/
@@ -146,34 +144,27 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
}
/*
- * Count conventional zones of a mapped zoned device. If the device
- * only has conventional zones, do not expose it as zoned.
- */
-static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- unsigned int *nr_conv_zones = data;
-
- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
- (*nr_conv_zones)++;
-
- return 0;
-}
-
-/*
* Revalidate the zones of a mapped device to initialize resource necessary
* for zone append emulation. Note that we cannot simply use the block layer
* blk_revalidate_disk_zones() function here as the mapped device is suspended
* (this is called from __bind() context).
*/
-static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
+int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
{
+ struct mapped_device *md = t->md;
struct gendisk *disk = md->disk;
int ret;
+ if (!get_capacity(disk))
+ return 0;
+
/* Revalidate only if something changed. */
- if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
+ if (!disk->nr_zones || disk->nr_zones != md->nr_zones) {
+ DMINFO("%s using %s zone append",
+ disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
md->nr_zones = 0;
+ }
if (md->nr_zones)
return 0;
@@ -220,13 +211,129 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
return true;
}
+struct dm_device_zone_count {
+ sector_t start;
+ sector_t len;
+ unsigned int total_nr_seq_zones;
+ unsigned int target_nr_seq_zones;
+};
+
+/*
+ * Count the total number of and the number of mapped sequential zones of a
+ * target zoned device.
+ */
+static int dm_device_count_zones_cb(struct blk_zone *zone,
+ unsigned int idx, void *data)
+{
+ struct dm_device_zone_count *zc = data;
+
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+ zc->total_nr_seq_zones++;
+ if (zone->start >= zc->start &&
+ zone->start < zc->start + zc->len)
+ zc->target_nr_seq_zones++;
+ }
+
+ return 0;
+}
+
+static int dm_device_count_zones(struct dm_dev *dev,
+ struct dm_device_zone_count *zc)
+{
+ int ret;
+
+ ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES,
+ dm_device_count_zones_cb, zc);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -EIO;
+ return 0;
+}
+
+struct dm_zone_resource_limits {
+ unsigned int mapped_nr_seq_zones;
+ struct queue_limits *lim;
+ bool reliable_limits;
+};
+
+static int device_get_zone_resource_limits(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct dm_zone_resource_limits *zlim = data;
+ struct gendisk *disk = dev->bdev->bd_disk;
+ unsigned int max_open_zones, max_active_zones;
+ int ret;
+ struct dm_device_zone_count zc = {
+ .start = start,
+ .len = len,
+ };
+
+ /*
+ * If the target is not the whole device, the device zone resources may
+ * be shared between different targets. Check this by counting the
+ * number of mapped sequential zones: if this number is smaller than the
+ * total number of sequential zones of the target device, then resource
+ * sharing may happen and the zone limits will not be reliable.
+ */
+ ret = dm_device_count_zones(dev, &zc);
+ if (ret) {
+ DMERR("Count %s zones failed %d", disk->disk_name, ret);
+ return ret;
+ }
+
+ /*
+ * If the target does not map any sequential zones, then we do not need
+ * any zone resource limits.
+ */
+ if (!zc.target_nr_seq_zones)
+ return 0;
+
+ /*
+ * If the target does not map all sequential zones, the limits
+ * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
+ */
+ if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
+ zlim->reliable_limits = false;
+ ti->zone_reset_all_supported = false;
+ }
+
+ /*
+ * If the target maps less sequential zones than the limit values, then
+ * we do not have limits for this target.
+ */
+ max_active_zones = disk->queue->limits.max_active_zones;
+ if (max_active_zones >= zc.target_nr_seq_zones)
+ max_active_zones = 0;
+ zlim->lim->max_active_zones =
+ min_not_zero(max_active_zones, zlim->lim->max_active_zones);
+
+ max_open_zones = disk->queue->limits.max_open_zones;
+ if (max_open_zones >= zc.target_nr_seq_zones)
+ max_open_zones = 0;
+ zlim->lim->max_open_zones =
+ min_not_zero(max_open_zones, zlim->lim->max_open_zones);
+
+ /*
+ * Also count the total number of sequential zones for the mapped
+ * device so that when we are done inspecting all its targets, we are
+ * able to check if the mapped device actually has any sequential zones.
+ */
+ zlim->mapped_nr_seq_zones += zc.target_nr_seq_zones;
+
+ return 0;
+}
+
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *lim)
{
struct mapped_device *md = t->md;
struct gendisk *disk = md->disk;
- unsigned int nr_conv_zones = 0;
- int ret;
+ struct dm_zone_resource_limits zlim = {
+ .reliable_limits = true,
+ .lim = lim,
+ };
/*
* Check if zone append is natively supported, and if not, set the
@@ -240,46 +347,63 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
lim->max_zone_append_sectors = 0;
}
- if (!get_capacity(md->disk))
- return 0;
-
/*
- * Count conventional zones to check that the mapped device will indeed
- * have sequential write required zones.
+ * Determine the max open and max active zone limits for the mapped
+ * device by inspecting the zone resource limits and the zones mapped
+ * by each target.
*/
- md->zone_revalidate_map = t;
- ret = dm_blk_report_zones(disk, 0, UINT_MAX,
- dm_check_zoned_cb, &nr_conv_zones);
- md->zone_revalidate_map = NULL;
- if (ret < 0) {
- DMERR("Check zoned failed %d", ret);
- return ret;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ /*
+ * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
+ * device_get_zone_resource_limits() may adjust this if one of
+ * the device used by the target does not have all its
+ * sequential write required zones mapped.
+ */
+ ti->zone_reset_all_supported = true;
+
+ if (!ti->type->iterate_devices ||
+ ti->type->iterate_devices(ti,
+ device_get_zone_resource_limits, &zlim)) {
+ DMERR("Could not determine %s zone resource limits",
+ disk->disk_name);
+ return -ENODEV;
+ }
}
/*
- * If we only have conventional zones, expose the mapped device as
- * a regular device.
+ * If we only have conventional zones mapped, expose the mapped device
+ + as a regular device.
*/
- if (nr_conv_zones >= ret) {
+ if (!zlim.mapped_nr_seq_zones) {
lim->max_open_zones = 0;
lim->max_active_zones = 0;
- lim->zoned = false;
+ lim->max_zone_append_sectors = 0;
+ lim->zone_write_granularity = 0;
+ lim->chunk_sectors = 0;
+ lim->features &= ~BLK_FEAT_ZONED;
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ md->nr_zones = 0;
disk->nr_zones = 0;
return 0;
}
- if (!md->disk->nr_zones) {
- DMINFO("%s using %s zone append",
- md->disk->disk_name,
- queue_emulates_zone_append(q) ? "emulated" : "native");
- }
-
- ret = dm_revalidate_zones(md, t);
- if (ret < 0)
- return ret;
+ /*
+ * Warn once (when the capacity is not yet set) if the mapped device is
+ * partially using zone resources of the target devices as that leads to
+ * unreliable limits, i.e. if another mapped device uses the same
+ * underlying devices, we cannot enforce zone limits to guarantee that
+ * writing will not lead to errors. Note that we really should return
+ * an error for such case but there is no easy way to find out if
+ * another mapped device uses the same underlying zoned devices.
+ */
+ if (!get_capacity(disk) && !zlim.reliable_limits)
+ DMWARN("%s zone resource limits may be unreliable",
+ disk->disk_name);
- if (!static_key_enabled(&zoned_enabled.key))
+ if (lim->features & BLK_FEAT_ZONED &&
+ !static_key_enabled(&zoned_enabled.key))
static_branch_enable(&zoned_enabled);
return 0;
}
@@ -306,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
return;
}
+
+static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ /*
+ * For an all-zones reset, ignore conventional, empty, read-only
+ * and offline zones.
+ */
+ switch (zone->cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_READONLY:
+ case BLK_ZONE_COND_OFFLINE:
+ return 0;
+ default:
+ set_bit(idx, (unsigned long *)data);
+ return 0;
+ }
+}
+
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset)
+{
+ int ret;
+
+ ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
+ dm_zone_need_reset_cb, need_reset);
+ if (ret != nr_zones) {
+ DMERR("Get %s zone reset bitmap failed\n",
+ md->disk->disk_name);
+ return -EIO;
+ }
+
+ return 0;
+}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 12236e6f46f3..cd0ee144973f 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1009,7 +1009,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->max_sectors = chunk_sectors;
/* We are exposing a drive-managed zoned block device */
- limits->zoned = false;
+ limits->features &= ~BLK_FEAT_ZONED;
}
/*
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 13037d6a6f62..4b1b69e576a5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1188,7 +1188,7 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
return len;
return min_t(sector_t, len,
min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
- blk_chunk_sectors_left(target_offset, max_granularity)));
+ blk_boundary_sectors_left(target_offset, max_granularity)));
}
static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
@@ -1598,20 +1598,19 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
static bool is_abnormal_io(struct bio *bio)
{
- enum req_op op = bio_op(bio);
-
- if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
- switch (op) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- case REQ_OP_WRITE_ZEROES:
- return true;
- default:
- break;
- }
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ case REQ_OP_FLUSH:
+ return false;
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_ZONE_RESET_ALL:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
static blk_status_t __process_abnormal_io(struct clone_info *ci,
@@ -1776,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
}
+
+static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ struct bio_list blist = BIO_EMPTY_LIST;
+ struct mapped_device *md = ci->io->md;
+ unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
+ unsigned long *need_reset;
+ unsigned int i, nr_zones, nr_reset;
+ unsigned int num_bios = 0;
+ blk_status_t sts = BLK_STS_OK;
+ sector_t sector = ti->begin;
+ struct bio *clone;
+ int ret;
+
+ nr_zones = ti->len >> ilog2(zone_sectors);
+ need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
+ if (!need_reset)
+ return BLK_STS_RESOURCE;
+
+ ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
+ nr_zones, need_reset);
+ if (ret) {
+ sts = BLK_STS_IOERR;
+ goto free_bitmap;
+ }
+
+ /* If we have no zone to reset, we are done. */
+ nr_reset = bitmap_weight(need_reset, nr_zones);
+ if (!nr_reset)
+ goto free_bitmap;
+
+ atomic_add(nr_zones, &ci->io->io_count);
+
+ for (i = 0; i < nr_zones; i++) {
+
+ if (!test_bit(i, need_reset)) {
+ sector += zone_sectors;
+ continue;
+ }
+
+ if (bio_list_empty(&blist)) {
+ /* This may take a while, so be nice to others */
+ if (num_bios)
+ cond_resched();
+
+ /*
+ * We may need to reset thousands of zones, so let's
+ * not go crazy with the clone allocation.
+ */
+ alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
+ NULL, GFP_NOIO);
+ }
+
+ /* Get a clone and change it to a regular reset operation. */
+ clone = bio_list_pop(&blist);
+ clone->bi_opf &= ~REQ_OP_MASK;
+ clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
+ clone->bi_iter.bi_sector = sector;
+ clone->bi_iter.bi_size = 0;
+ __map_bio(clone);
+
+ sector += zone_sectors;
+ num_bios++;
+ nr_reset--;
+ }
+
+ WARN_ON_ONCE(!bio_list_empty(&blist));
+ atomic_sub(nr_zones - num_bios, &ci->io->io_count);
+ ci->sector_count = 0;
+
+free_bitmap:
+ bitmap_free(need_reset);
+
+ return sts;
+}
+
+static void __send_zone_reset_all_native(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ unsigned int bios;
+
+ atomic_add(1, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ atomic_sub(1 - bios, &ci->io->io_count);
+
+ ci->sector_count = 0;
+}
+
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ struct dm_table *t = ci->map;
+ blk_status_t sts = BLK_STS_OK;
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (ti->zone_reset_all_supported) {
+ __send_zone_reset_all_native(ci, ti);
+ continue;
+ }
+
+ sts = __send_zone_reset_all_emulated(ci, ti);
+ if (sts != BLK_STS_OK)
+ break;
+ }
+
+ /* Release the reference that alloc_io() took for submission. */
+ atomic_sub(1, &ci->io->io_count);
+
+ return sts;
+}
+
#else
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
struct bio *bio)
@@ -1786,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return false;
}
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ return BLK_STS_NOTSUPP;
+}
#endif
/*
@@ -1799,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
blk_status_t error = BLK_STS_OK;
bool is_abnormal, need_split;
- need_split = is_abnormal = is_abnormal_io(bio);
- if (static_branch_unlikely(&zoned_enabled))
- need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
+ is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
+ need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
+ (is_abnormal || dm_zone_bio_needs_split(md, bio));
+ } else {
+ need_split = is_abnormal;
+ }
if (unlikely(need_split)) {
/*
@@ -1842,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
goto out;
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
+ error = __send_zone_reset_all(&ci);
+ goto out;
+ }
+
error = __split_and_process_bio(&ci);
if (error || !ci.sector_count)
goto out;
@@ -2386,22 +2513,15 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
struct table_device *td;
int r;
- switch (type) {
- case DM_TYPE_REQUEST_BASED:
+ WARN_ON_ONCE(type == DM_TYPE_NONE);
+
+ if (type == DM_TYPE_REQUEST_BASED) {
md->disk->fops = &dm_rq_blk_dops;
r = dm_mq_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based dm mapped device");
return r;
}
- break;
- case DM_TYPE_BIO_BASED:
- case DM_TYPE_DAX_BIO_BASED:
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue);
- break;
- case DM_TYPE_NONE:
- WARN_ON_ONCE(true);
- break;
}
r = dm_calculate_queue_limits(t, &limits);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 53ef8207fe2c..cc466ad5cb1d 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -103,12 +103,16 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
*/
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *lim);
+int dm_revalidate_zones(struct dm_table *t, struct request_queue *q);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset);
#else
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 0a2d37eb38ef..08232d8dc815 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -227,6 +227,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
+ unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
+ PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
@@ -269,11 +271,9 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
if (size == 0)
/* bitmap runs in to data */
return -EINVAL;
- } else {
- /* DATA METADATA BITMAP - no problems */
}
- md_super_write(mddev, rdev, sboff + ps, (int) size, page);
+ md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
return 0;
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 8e36a0feec09..139fe2019c1d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1570,7 +1570,7 @@ out:
return err;
}
-static struct md_cluster_operations cluster_ops = {
+static const struct md_cluster_operations cluster_ops = {
.join = join,
.leave = leave,
.slot_number = slot_number,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aff9118ff697..64693913ed18 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -69,13 +69,23 @@
#include "md-bitmap.h"
#include "md-cluster.h"
+static const char *action_name[NR_SYNC_ACTIONS] = {
+ [ACTION_RESYNC] = "resync",
+ [ACTION_RECOVER] = "recover",
+ [ACTION_CHECK] = "check",
+ [ACTION_REPAIR] = "repair",
+ [ACTION_RESHAPE] = "reshape",
+ [ACTION_FROZEN] = "frozen",
+ [ACTION_IDLE] = "idle",
+};
+
/* pers_list is a list of registered personalities protected by pers_lock. */
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
static const struct kobj_type md_ktype;
-struct md_cluster_operations *md_cluster_ops;
+const struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
static struct module *md_cluster_mod;
@@ -479,7 +489,6 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
*/
WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
- del_timer_sync(&mddev->safemode_timer);
/* restrict memory reclaim I/O during raid array is suspend */
mddev->noio_flag = memalloc_noio_save();
@@ -550,13 +559,9 @@ static void md_end_flush(struct bio *bio)
rdev_dec_pending(rdev, mddev);
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pair is percpu_ref_get() from md_flush_request() */
- percpu_ref_put(&mddev->active_io);
-
+ if (atomic_dec_and_test(&mddev->flush_pending))
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
- }
}
static void md_submit_flush_data(struct work_struct *ws);
@@ -587,12 +592,8 @@ static void submit_flushes(struct work_struct *ws)
rcu_read_lock();
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pair is percpu_ref_get() from md_flush_request() */
- percpu_ref_put(&mddev->active_io);
-
+ if (atomic_dec_and_test(&mddev->flush_pending))
queue_work(md_wq, &mddev->flush_work);
- }
}
static void md_submit_flush_data(struct work_struct *ws)
@@ -617,8 +618,20 @@ static void md_submit_flush_data(struct work_struct *ws)
bio_endio(bio);
} else {
bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
+
+ /*
+ * make_requst() will never return error here, it only
+ * returns error in raid5_make_request() by dm-raid.
+ * Since dm always splits data and flush operation into
+ * two separate io, io size of flush submitted by dm
+ * always is 0, make_request() will not be called here.
+ */
+ if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
+ bio_io_error(bio);
}
+
+ /* The pair is percpu_ref_get() from md_flush_request() */
+ percpu_ref_put(&mddev->active_io);
}
/*
@@ -654,24 +667,22 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
WARN_ON(percpu_ref_is_zero(&mddev->active_io));
percpu_ref_get(&mddev->active_io);
mddev->flush_bio = bio;
- bio = NULL;
- }
- spin_unlock_irq(&mddev->lock);
-
- if (!bio) {
+ spin_unlock_irq(&mddev->lock);
INIT_WORK(&mddev->flush_work, submit_flushes);
queue_work(md_wq, &mddev->flush_work);
- } else {
- /* flush was performed for some other bio while we waited. */
- if (bio->bi_iter.bi_size == 0)
- /* an empty barrier - all done */
- bio_endio(bio);
- else {
- bio->bi_opf &= ~REQ_PREFLUSH;
- return false;
- }
+ return true;
}
- return true;
+
+ /* flush was performed for some other bio while we waited. */
+ spin_unlock_irq(&mddev->lock);
+ if (bio->bi_iter.bi_size == 0) {
+ /* pure flush without data - all done */
+ bio_endio(bio);
+ return true;
+ }
+
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ return false;
}
EXPORT_SYMBOL(md_flush_request);
@@ -742,7 +753,6 @@ int mddev_init(struct mddev *mddev)
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
- mutex_init(&mddev->sync_mutex);
mutex_init(&mddev->suspend_mutex);
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
@@ -758,7 +768,7 @@ int mddev_init(struct mddev *mddev)
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0;
- mddev->last_sync_action = "none";
+ mddev->last_sync_action = ACTION_IDLE;
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
@@ -2410,36 +2420,10 @@ static LIST_HEAD(pending_raid_disks);
*/
int md_integrity_register(struct mddev *mddev)
{
- struct md_rdev *rdev, *reference = NULL;
-
if (list_empty(&mddev->disks))
return 0; /* nothing to do */
- if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
- return 0; /* shouldn't register, or already is */
- rdev_for_each(rdev, mddev) {
- /* skip spares and non-functional disks */
- if (test_bit(Faulty, &rdev->flags))
- continue;
- if (rdev->raid_disk < 0)
- continue;
- if (!reference) {
- /* Use the first rdev as the reference */
- reference = rdev;
- continue;
- }
- /* does this rdev's profile match the reference profile? */
- if (blk_integrity_compare(reference->bdev->bd_disk,
- rdev->bdev->bd_disk) < 0)
- return -EINVAL;
- }
- if (!reference || !bdev_get_integrity(reference->bdev))
- return 0;
- /*
- * All component devices are integrity capable and have matching
- * profiles, register the common profile for the md device.
- */
- blk_integrity_register(mddev->gendisk,
- bdev_get_integrity(reference->bdev));
+ if (mddev_is_dm(mddev) || !blk_get_integrity(mddev->gendisk))
+ return 0; /* shouldn't register */
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
@@ -2459,32 +2443,6 @@ int md_integrity_register(struct mddev *mddev)
}
EXPORT_SYMBOL(md_integrity_register);
-/*
- * Attempt to add an rdev, but only if it is consistent with the current
- * integrity profile
- */
-int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
-{
- struct blk_integrity *bi_mddev;
-
- if (mddev_is_dm(mddev))
- return 0;
-
- bi_mddev = blk_get_integrity(mddev->gendisk);
-
- if (!bi_mddev) /* nothing to do */
- return 0;
-
- if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
- pr_err("%s: incompatible integrity profile for %pg\n",
- mdname(mddev), rdev->bdev);
- return -ENXIO;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(md_integrity_add_rdev);
-
static bool rdev_read_only(struct md_rdev *rdev)
{
return bdev_read_only(rdev->bdev) ||
@@ -4867,30 +4825,81 @@ out_unlock:
static struct md_sysfs_entry md_metadata =
__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
-static ssize_t
-action_show(struct mddev *mddev, char *page)
+enum sync_action md_sync_action(struct mddev *mddev)
{
- char *type = "idle";
unsigned long recovery = mddev->recovery;
+
+ /*
+ * frozen has the highest priority, means running sync_thread will be
+ * stopped immediately, and no new sync_thread can start.
+ */
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
- type = "frozen";
- else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
- if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
- type = "reshape";
- else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
- if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
- type = "resync";
- else if (test_bit(MD_RECOVERY_CHECK, &recovery))
- type = "check";
- else
- type = "repair";
- } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
- type = "recover";
- else if (mddev->reshape_position != MaxSector)
- type = "reshape";
+ return ACTION_FROZEN;
+
+ /*
+ * read-only array can't register sync_thread, and it can only
+ * add/remove spares.
+ */
+ if (!md_is_rdwr(mddev))
+ return ACTION_IDLE;
+
+ /*
+ * idle means no sync_thread is running, and no new sync_thread is
+ * requested.
+ */
+ if (!test_bit(MD_RECOVERY_RUNNING, &recovery) &&
+ !test_bit(MD_RECOVERY_NEEDED, &recovery))
+ return ACTION_IDLE;
+
+ if (test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+ mddev->reshape_position != MaxSector)
+ return ACTION_RESHAPE;
+
+ if (test_bit(MD_RECOVERY_RECOVER, &recovery))
+ return ACTION_RECOVER;
+
+ if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
+ /*
+ * MD_RECOVERY_CHECK must be paired with
+ * MD_RECOVERY_REQUESTED.
+ */
+ if (test_bit(MD_RECOVERY_CHECK, &recovery))
+ return ACTION_CHECK;
+ if (test_bit(MD_RECOVERY_REQUESTED, &recovery))
+ return ACTION_REPAIR;
+ return ACTION_RESYNC;
}
- return sprintf(page, "%s\n", type);
+
+ /*
+ * MD_RECOVERY_NEEDED or MD_RECOVERY_RUNNING is set, however, no
+ * sync_action is specified.
+ */
+ return ACTION_IDLE;
+}
+
+enum sync_action md_sync_action_by_name(const char *page)
+{
+ enum sync_action action;
+
+ for (action = 0; action < NR_SYNC_ACTIONS; ++action) {
+ if (cmd_match(page, action_name[action]))
+ return action;
+ }
+
+ return NR_SYNC_ACTIONS;
+}
+
+const char *md_sync_action_name(enum sync_action action)
+{
+ return action_name[action];
+}
+
+static ssize_t
+action_show(struct mddev *mddev, char *page)
+{
+ enum sync_action action = md_sync_action(mddev);
+
+ return sprintf(page, "%s\n", md_sync_action_name(action));
}
/**
@@ -4899,15 +4908,10 @@ action_show(struct mddev *mddev, char *page)
* @locked: if set, reconfig_mutex will still be held after this function
* return; if not set, reconfig_mutex will be released after this
* function return.
- * @check_seq: if set, only wait for curent running sync_thread to stop, noted
- * that new sync_thread can still start.
*/
-static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
+static void stop_sync_thread(struct mddev *mddev, bool locked)
{
- int sync_seq;
-
- if (check_seq)
- sync_seq = atomic_read(&mddev->sync_seq);
+ int sync_seq = atomic_read(&mddev->sync_seq);
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
if (!locked)
@@ -4928,7 +4932,8 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
wait_event(resync_wait,
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- (check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
+ (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery) &&
+ sync_seq != atomic_read(&mddev->sync_seq)));
if (locked)
mddev_lock_nointr(mddev);
@@ -4939,7 +4944,7 @@ void md_idle_sync_thread(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, true);
+ stop_sync_thread(mddev, true);
}
EXPORT_SYMBOL_GPL(md_idle_sync_thread);
@@ -4948,7 +4953,7 @@ void md_frozen_sync_thread(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
}
EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
@@ -4963,100 +4968,127 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
-static void idle_sync_thread(struct mddev *mddev)
+static int mddev_start_reshape(struct mddev *mddev)
{
- mutex_lock(&mddev->sync_mutex);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
- if (mddev_lock(mddev)) {
- mutex_unlock(&mddev->sync_mutex);
- return;
- }
-
- stop_sync_thread(mddev, false, true);
- mutex_unlock(&mddev->sync_mutex);
-}
+ int ret;
-static void frozen_sync_thread(struct mddev *mddev)
-{
- mutex_lock(&mddev->sync_mutex);
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (mddev->pers->start_reshape == NULL)
+ return -EINVAL;
- if (mddev_lock(mddev)) {
- mutex_unlock(&mddev->sync_mutex);
- return;
+ if (mddev->reshape_position == MaxSector ||
+ mddev->pers->check_reshape == NULL ||
+ mddev->pers->check_reshape(mddev)) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = mddev->pers->start_reshape(mddev);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * If reshape is still in progress, and md_check_recovery() can
+ * continue to reshape, don't restart reshape because data can
+ * be corrupted for raid456.
+ */
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, false, false);
- mutex_unlock(&mddev->sync_mutex);
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
+ return 0;
}
static ssize_t
action_store(struct mddev *mddev, const char *page, size_t len)
{
+ int ret;
+ enum sync_action action;
+
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
+retry:
+ if (work_busy(&mddev->sync_work))
+ flush_work(&mddev->sync_work);
- if (cmd_match(page, "idle"))
- idle_sync_thread(mddev);
- else if (cmd_match(page, "frozen"))
- frozen_sync_thread(mddev);
- else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- return -EBUSY;
- else if (cmd_match(page, "resync"))
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- else if (cmd_match(page, "recover")) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- } else if (cmd_match(page, "reshape")) {
- int err;
- if (mddev->pers->start_reshape == NULL)
- return -EINVAL;
- err = mddev_lock(mddev);
- if (!err) {
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- err = -EBUSY;
- } else if (mddev->reshape_position == MaxSector ||
- mddev->pers->check_reshape == NULL ||
- mddev->pers->check_reshape(mddev)) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- err = mddev->pers->start_reshape(mddev);
- } else {
- /*
- * If reshape is still in progress, and
- * md_check_recovery() can continue to reshape,
- * don't restart reshape because data can be
- * corrupted for raid456.
- */
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- }
- mddev_unlock(mddev);
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ if (work_busy(&mddev->sync_work)) {
+ mddev_unlock(mddev);
+ goto retry;
+ }
+
+ action = md_sync_action_by_name(page);
+
+ /* TODO: mdadm rely on "idle" to start sync_thread. */
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ switch (action) {
+ case ACTION_FROZEN:
+ md_frozen_sync_thread(mddev);
+ ret = len;
+ goto out;
+ case ACTION_IDLE:
+ md_idle_sync_thread(mddev);
+ break;
+ case ACTION_RESHAPE:
+ case ACTION_RECOVER:
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ case ACTION_RESYNC:
+ ret = -EBUSY;
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto out;
}
- if (err)
- return err;
- sysfs_notify_dirent_safe(mddev->sysfs_degraded);
} else {
- if (cmd_match(page, "check"))
+ switch (action) {
+ case ACTION_FROZEN:
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = len;
+ goto out;
+ case ACTION_RESHAPE:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = mddev_start_reshape(mddev);
+ if (ret)
+ goto out;
+ break;
+ case ACTION_RECOVER:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ break;
+ case ACTION_CHECK:
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
- else if (!cmd_match(page, "repair"))
- return -EINVAL;
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ fallthrough;
+ case ACTION_REPAIR:
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ fallthrough;
+ case ACTION_RESYNC:
+ case ACTION_IDLE:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
}
+
if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
+
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
- return len;
+ ret = len;
+
+out:
+ mddev_unlock(mddev);
+ return ret;
}
static struct md_sysfs_entry md_scan_mode =
@@ -5065,7 +5097,8 @@ __ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
static ssize_t
last_sync_action_show(struct mddev *mddev, char *page)
{
- return sprintf(page, "%s\n", mddev->last_sync_action);
+ return sprintf(page, "%s\n",
+ md_sync_action_name(mddev->last_sync_action));
}
static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
@@ -5755,14 +5788,20 @@ static const struct kobj_type md_ktype = {
int mdp_major = 0;
/* stack the limit for all rdevs into lim */
-void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
+int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
+ unsigned int flags)
{
struct md_rdev *rdev;
rdev_for_each(rdev, mddev) {
queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
+ if ((flags & MDDEV_STACK_INTEGRITY) &&
+ !queue_limits_stack_integrity_bdev(lim, rdev->bdev))
+ return -EINVAL;
}
+
+ return 0;
}
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
@@ -5777,6 +5816,14 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
lim = queue_limits_start_update(mddev->gendisk->queue);
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
+
+ if (!queue_limits_stack_integrity_bdev(&lim, rdev->bdev)) {
+ pr_err("%s: incompatible integrity profile for %pg\n",
+ mdname(mddev), rdev->bdev);
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return -ENXIO;
+ }
+
return queue_limits_commit_update(mddev->gendisk->queue, &lim);
}
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
@@ -5806,6 +5853,14 @@ static void mddev_delayed_delete(struct work_struct *ws)
kobject_put(&mddev->kobj);
}
+void md_init_stacking_limits(struct queue_limits *lim)
+{
+ blk_set_stacking_limits(lim);
+ lim->features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
+}
+EXPORT_SYMBOL_GPL(md_init_stacking_limits);
+
struct mddev *md_alloc(dev_t dev, char *name)
{
/*
@@ -5823,7 +5878,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
int partitioned;
int shift;
int unit;
- int error ;
+ int error;
/*
* Wait for any previous instance of this device to be completely
@@ -5881,7 +5936,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
disk->fops = &md_fops;
disk->private_data = mddev;
- blk_queue_write_cache(disk->queue, true, true);
disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk;
error = add_disk(disk);
@@ -6185,28 +6239,6 @@ int md_run(struct mddev *mddev)
}
}
- if (!mddev_is_dm(mddev)) {
- struct request_queue *q = mddev->gendisk->queue;
- bool nonrot = true;
-
- rdev_for_each(rdev, mddev) {
- if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
- nonrot = false;
- break;
- }
- }
- if (mddev->degraded)
- nonrot = false;
- if (nonrot)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
-
- /* Set the NOWAIT flags if all underlying devices support it */
- if (nowait)
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
- }
if (pers->sync_request) {
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
@@ -6437,7 +6469,7 @@ void md_stop_writes(struct mddev *mddev)
{
mddev_lock_nointr(mddev);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
@@ -6505,7 +6537,7 @@ static int md_set_readonly(struct mddev *mddev)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, false, false);
+ stop_sync_thread(mddev, false);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@@ -6551,7 +6583,7 @@ static int do_md_stop(struct mddev *mddev, int mode)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
if (mddev->sysfs_active ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
@@ -7166,15 +7198,6 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
if (!mddev->thread)
md_update_sb(mddev, 1);
/*
- * If the new disk does not support REQ_NOWAIT,
- * disable on the whole MD.
- */
- if (!bdev_nowait(rdev->bdev)) {
- pr_info("%s: Disabling nowait because %pg does not support nowait\n",
- mdname(mddev), rdev->bdev);
- blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
- }
- /*
* Kick recovery, maybe this spare has to be added to the
* array immediately.
*/
@@ -7742,12 +7765,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
return get_bitmap_file(mddev, argp);
}
- if (cmd == HOT_REMOVE_DISK)
- /* need to ensure recovery thread has run */
- wait_event_interruptible_timeout(mddev->sb_wait,
- !test_bit(MD_RECOVERY_NEEDED,
- &mddev->recovery),
- msecs_to_jiffies(5000));
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
/* Need to flush page cache, and ensure no-one else opens
* and writes
@@ -8520,7 +8537,7 @@ int unregister_md_personality(struct md_personality *p)
}
EXPORT_SYMBOL(unregister_md_personality);
-int register_md_cluster_operations(struct md_cluster_operations *ops,
+int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module)
{
int ret = 0;
@@ -8641,12 +8658,12 @@ EXPORT_SYMBOL(md_done_sync);
* A return value of 'false' means that the write wasn't recorded
* and cannot proceed as the array is being suspend.
*/
-bool md_write_start(struct mddev *mddev, struct bio *bi)
+void md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
if (bio_data_dir(bi) != WRITE)
- return true;
+ return;
BUG_ON(mddev->ro == MD_RDONLY);
if (mddev->ro == MD_AUTO_READ) {
@@ -8679,15 +8696,9 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
if (!mddev->has_superblocks)
- return true;
+ return;
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
- is_md_suspended(mddev));
- if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
- percpu_ref_put(&mddev->writes_pending);
- return false;
- }
- return true;
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
}
EXPORT_SYMBOL(md_write_start);
@@ -8835,6 +8846,77 @@ void md_allow_write(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_allow_write);
+static sector_t md_sync_max_sectors(struct mddev *mddev,
+ enum sync_action action)
+{
+ switch (action) {
+ case ACTION_RESYNC:
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ atomic64_set(&mddev->resync_mismatches, 0);
+ fallthrough;
+ case ACTION_RESHAPE:
+ return mddev->resync_max_sectors;
+ case ACTION_RECOVER:
+ return mddev->dev_sectors;
+ default:
+ return 0;
+ }
+}
+
+static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
+{
+ sector_t start = 0;
+ struct md_rdev *rdev;
+
+ switch (action) {
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ return mddev->resync_min;
+ case ACTION_RESYNC:
+ if (!mddev->bitmap)
+ return mddev->recovery_cp;
+ return 0;
+ case ACTION_RESHAPE:
+ /*
+ * If the original node aborts reshaping then we continue the
+ * reshaping, so set again to avoid restart reshape from the
+ * first beginning
+ */
+ if (mddev_is_clustered(mddev) &&
+ mddev->reshape_position != MaxSector)
+ return mddev->reshape_position;
+ return 0;
+ case ACTION_RECOVER:
+ start = MaxSector;
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < start)
+ start = rdev->recovery_offset;
+ rcu_read_unlock();
+
+ /* If there is a bitmap, we need to make sure all
+ * writes that started before we added a spare
+ * complete before we start doing a recovery.
+ * Otherwise the write might complete and (via
+ * bitmap_endwrite) set a bit in the bitmap after the
+ * recovery has checked that bit and skipped that
+ * region.
+ */
+ if (mddev->bitmap) {
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+ }
+ return start;
+ default:
+ return MaxSector;
+ }
+}
+
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
#define UPDATE_FREQUENCY (5*60*HZ)
@@ -8851,7 +8933,8 @@ void md_do_sync(struct md_thread *thread)
sector_t last_check;
int skipped = 0;
struct md_rdev *rdev;
- char *desc, *action = NULL;
+ enum sync_action action;
+ const char *desc;
struct blk_plug plug;
int ret;
@@ -8882,21 +8965,9 @@ void md_do_sync(struct md_thread *thread)
goto skip;
}
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
- desc = "data-check";
- action = "check";
- } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
- desc = "requested-resync";
- action = "repair";
- } else
- desc = "resync";
- } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- desc = "reshape";
- else
- desc = "recovery";
-
- mddev->last_sync_action = action ?: desc;
+ action = md_sync_action(mddev);
+ desc = md_sync_action_name(action);
+ mddev->last_sync_action = action;
/*
* Before starting a resync we must have set curr_resync to
@@ -8964,56 +9035,8 @@ void md_do_sync(struct md_thread *thread)
spin_unlock(&all_mddevs_lock);
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
- j = 0;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- /* resync follows the size requested by the personality,
- * which defaults to physical size, but can be virtual size
- */
- max_sectors = mddev->resync_max_sectors;
- atomic64_set(&mddev->resync_mismatches, 0);
- /* we don't use the checkpoint if there's a bitmap */
- if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
- j = mddev->resync_min;
- else if (!mddev->bitmap)
- j = mddev->recovery_cp;
-
- } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
- max_sectors = mddev->resync_max_sectors;
- /*
- * If the original node aborts reshaping then we continue the
- * reshaping, so set j again to avoid restart reshape from the
- * first beginning
- */
- if (mddev_is_clustered(mddev) &&
- mddev->reshape_position != MaxSector)
- j = mddev->reshape_position;
- } else {
- /* recovery follows the physical size of devices */
- max_sectors = mddev->dev_sectors;
- j = MaxSector;
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < j)
- j = rdev->recovery_offset;
- rcu_read_unlock();
-
- /* If there is a bitmap, we need to make sure all
- * writes that started before we added a spare
- * complete before we start doing a recovery.
- * Otherwise the write might complete and (via
- * bitmap_endwrite) set a bit in the bitmap after the
- * recovery has checked that bit and skipped that
- * region.
- */
- if (mddev->bitmap) {
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
- }
+ max_sectors = md_sync_max_sectors(mddev, action);
+ j = md_sync_position(mddev, action);
pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
@@ -9095,7 +9118,8 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;
- sectors = mddev->pers->sync_request(mddev, j, &skipped);
+ sectors = mddev->pers->sync_request(mddev, j, max_sectors,
+ &skipped);
if (sectors == 0) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
break;
@@ -9185,7 +9209,7 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
- mddev->pers->sync_request(mddev, max_sectors, &skipped);
+ mddev->pers->sync_request(mddev, max_sectors, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > MD_RESYNC_ACTIVE) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ca085ecad504..a0d6827dced9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -34,6 +34,61 @@
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
+/* Status of sync thread. */
+enum sync_action {
+ /*
+ * Represent by MD_RECOVERY_SYNC, start when:
+ * 1) after assemble, sync data from first rdev to other copies, this
+ * must be done first before other sync actions and will only execute
+ * once;
+ * 2) resize the array(notice that this is not reshape), sync data for
+ * the new range;
+ */
+ ACTION_RESYNC,
+ /*
+ * Represent by MD_RECOVERY_RECOVER, start when:
+ * 1) for new replacement, sync data based on the replace rdev or
+ * available copies from other rdev;
+ * 2) for new member disk while the array is degraded, sync data from
+ * other rdev;
+ * 3) reassemble after power failure or re-add a hot removed rdev, sync
+ * data from first rdev to other copies based on bitmap;
+ */
+ ACTION_RECOVER,
+ /*
+ * Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED |
+ * MD_RECOVERY_CHECK, start when user echo "check" to sysfs api
+ * sync_action, used to check if data copies from differenct rdev are
+ * the same. The number of mismatch sectors will be exported to user
+ * by sysfs api mismatch_cnt;
+ */
+ ACTION_CHECK,
+ /*
+ * Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED, start when
+ * user echo "repair" to sysfs api sync_action, usually paired with
+ * ACTION_CHECK, used to force syncing data once user found that there
+ * are inconsistent data,
+ */
+ ACTION_REPAIR,
+ /*
+ * Represent by MD_RECOVERY_RESHAPE, start when new member disk is added
+ * to the conf, notice that this is different from spares or
+ * replacement;
+ */
+ ACTION_RESHAPE,
+ /*
+ * Represent by MD_RECOVERY_FROZEN, can be set by sysfs api sync_action
+ * or internal usage like setting the array read-only, will forbid above
+ * actions.
+ */
+ ACTION_FROZEN,
+ /*
+ * All above actions don't match.
+ */
+ ACTION_IDLE,
+ NR_SYNC_ACTIONS,
+};
+
/*
* The struct embedded in rdev is used to serialize IO.
*/
@@ -371,13 +426,12 @@ struct mddev {
struct md_thread __rcu *thread; /* management thread */
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
- /* 'last_sync_action' is initialized to "none". It is set when a
- * sync operation (i.e "data-check", "requested-resync", "resync",
- * "recovery", or "reshape") is started. It holds this value even
+ /*
+ * Set when a sync operation is started. It holds this value even
* when the sync thread is "frozen" (interrupted) or "idle" (stopped
- * or finished). It is overwritten when a new sync operation is begun.
+ * or finished). It is overwritten when a new sync operation is begun.
*/
- char *last_sync_action;
+ enum sync_action last_sync_action;
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
@@ -540,8 +594,6 @@ struct mddev {
*/
struct list_head deleting;
- /* Used to synchronize idle and frozen for action_store() */
- struct mutex sync_mutex;
/* The sequence number for sync thread */
atomic_t sync_seq;
@@ -551,22 +603,46 @@ struct mddev {
};
enum recovery_flags {
+ /* flags for sync thread running status */
+
+ /*
+ * set when one of sync action is set and new sync thread need to be
+ * registered, or just add/remove spares from conf.
+ */
+ MD_RECOVERY_NEEDED,
+ /* sync thread is running, or about to be started */
+ MD_RECOVERY_RUNNING,
+ /* sync thread needs to be aborted for some reason */
+ MD_RECOVERY_INTR,
+ /* sync thread is done and is waiting to be unregistered */
+ MD_RECOVERY_DONE,
+ /* running sync thread must abort immediately, and not restart */
+ MD_RECOVERY_FROZEN,
+ /* waiting for pers->start() to finish */
+ MD_RECOVERY_WAIT,
+ /* interrupted because io-error */
+ MD_RECOVERY_ERROR,
+
+ /* flags determines sync action, see details in enum sync_action */
+
+ /* if just this flag is set, action is resync. */
+ MD_RECOVERY_SYNC,
+ /*
+ * paired with MD_RECOVERY_SYNC, if MD_RECOVERY_CHECK is not set,
+ * action is repair, means user requested resync.
+ */
+ MD_RECOVERY_REQUESTED,
/*
- * If neither SYNC or RESHAPE are set, then it is a recovery.
+ * paired with MD_RECOVERY_SYNC and MD_RECOVERY_REQUESTED, action is
+ * check.
*/
- MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
- MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
- MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
- MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
- MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
- MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
- MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
- MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
- MD_RECOVERY_RESHAPE, /* A reshape is happening */
- MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
- MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
- MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
- MD_RESYNCING_REMOTE, /* remote node is running resync thread */
+ MD_RECOVERY_CHECK,
+ /* recovery, or need to try it */
+ MD_RECOVERY_RECOVER,
+ /* reshape */
+ MD_RECOVERY_RESHAPE,
+ /* remote node is running resync thread */
+ MD_RESYNCING_REMOTE,
};
enum md_ro_state {
@@ -653,7 +729,8 @@ struct md_personality
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*spare_active) (struct mddev *mddev);
- sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
+ sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr,
+ sector_t max_sector, int *skipped);
int (*resize) (struct mddev *mddev, sector_t sectors);
sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
int (*check_reshape) (struct mddev *mddev);
@@ -772,7 +849,7 @@ static inline void safe_put_page(struct page *p)
extern int register_md_personality(struct md_personality *p);
extern int unregister_md_personality(struct md_personality *p);
-extern int register_md_cluster_operations(struct md_cluster_operations *ops,
+extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module);
extern int unregister_md_cluster_operations(void);
extern int md_setup_cluster(struct mddev *mddev, int nodes);
@@ -785,7 +862,10 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
-extern bool md_write_start(struct mddev *mddev, struct bio *bi);
+extern enum sync_action md_sync_action(struct mddev *mddev);
+extern enum sync_action md_sync_action_by_name(const char *page);
+extern const char *md_sync_action_name(enum sync_action action);
+extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
@@ -809,11 +889,11 @@ extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(struct mddev *mddev);
extern int md_integrity_register(struct mddev *mddev);
-extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
extern int mddev_init(struct mddev *mddev);
extern void mddev_destroy(struct mddev *mddev);
+void md_init_stacking_limits(struct queue_limits *lim);
struct mddev *md_alloc(dev_t dev, char *name);
void mddev_put(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
@@ -852,7 +932,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
-extern struct md_cluster_operations *md_cluster_ops;
+extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
@@ -908,7 +988,9 @@ void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
int do_md_run(struct mddev *mddev);
-void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
+#define MDDEV_STACK_INTEGRITY (1u << 0)
+int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
+ unsigned int flags);
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c5d4aeb68404..32d587524778 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -365,30 +365,30 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
-static void free_conf(struct mddev *mddev, struct r0conf *conf)
-{
- kfree(conf->strip_zone);
- kfree(conf->devlist);
- kfree(conf);
-}
-
static void raid0_free(struct mddev *mddev, void *priv)
{
struct r0conf *conf = priv;
- free_conf(mddev, conf);
+ kfree(conf->strip_zone);
+ kfree(conf->devlist);
+ kfree(conf);
}
static int raid0_set_limits(struct mddev *mddev)
{
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_hw_sectors = mddev->chunk_sectors;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
@@ -415,7 +415,7 @@ static int raid0_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid0_set_limits(mddev);
if (ret)
- goto out_free_conf;
+ return ret;
}
/* calculate array device size */
@@ -427,13 +427,7 @@ static int raid0_run(struct mddev *mddev)
dump_zones(mddev);
- ret = md_integrity_register(mddev);
- if (ret)
- goto out_free_conf;
- return 0;
-out_free_conf:
- free_conf(mddev, conf);
- return ret;
+ return md_integrity_register(mddev);
}
/*
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b8a71ca66dd..04a0c2ca1732 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1687,8 +1687,7 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
if (bio_data_dir(bio) == READ)
raid1_read_request(mddev, bio, sectors, NULL);
else {
- if (!md_write_start(mddev,bio))
- return false;
+ md_write_start(mddev,bio);
raid1_write_request(mddev, bio, sectors);
}
return true;
@@ -1907,9 +1906,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (mddev->recovery_disabled == conf->recovery_disabled)
return -EBUSY;
- if (md_integrity_add_rdev(rdev, mddev))
- return -ENXIO;
-
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
@@ -2757,12 +2753,12 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
*/
static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
struct bio *bio;
- sector_t max_sector, nr_sectors;
+ sector_t nr_sectors;
int disk = -1;
int i;
int wonly = -1;
@@ -2778,7 +2774,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (init_resync(conf))
return 0;
- max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
@@ -3197,14 +3192,18 @@ static struct r1conf *setup_conf(struct mddev *mddev)
static int raid1_set_limits(struct mddev *mddev)
{
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
-static void raid1_free(struct mddev *mddev, void *priv);
static int raid1_run(struct mddev *mddev)
{
struct r1conf *conf;
@@ -3238,7 +3237,7 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
if (ret)
- goto abort;
+ return ret;
}
mddev->degraded = 0;
@@ -3252,8 +3251,7 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
- ret = -EINVAL;
- goto abort;
+ return -EINVAL;
}
if (conf->raid_disks - mddev->degraded == 1)
@@ -3277,14 +3275,8 @@ static int raid1_run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
ret = md_integrity_register(mddev);
- if (ret) {
+ if (ret)
md_unregister_thread(mddev, &mddev->thread);
- goto abort;
- }
- return 0;
-
-abort:
- raid1_free(mddev, conf);
return ret;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a4556d2e46bf..2a9c4ee982e0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1836,8 +1836,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
&& md_flush_request(mddev, bio))
return true;
- if (!md_write_start(mddev, bio))
- return false;
+ md_write_start(mddev, bio);
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
if (!raid10_handle_discard(mddev, bio))
@@ -2083,9 +2082,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL;
- if (md_integrity_add_rdev(rdev, mddev))
- return -ENXIO;
-
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
@@ -3140,12 +3136,12 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
*/
static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
struct bio *biolist = NULL, *bio;
- sector_t max_sector, nr_sectors;
+ sector_t nr_sectors;
int i;
int max_sync;
sector_t sync_blocks;
@@ -3175,10 +3171,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
return 0;
skipped:
- max_sector = mddev->dev_sectors;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
- test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0;
@@ -3980,12 +3972,17 @@ static int raid10_set_queue_limits(struct mddev *mddev)
{
struct r10conf *conf = mddev->private;
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2bd1ce9b3922..c14cf2410365 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -155,7 +155,7 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
return slot;
}
-static void print_raid5_conf (struct r5conf *conf);
+static void print_raid5_conf(struct r5conf *conf);
static int stripe_operations_active(struct stripe_head *sh)
{
@@ -5899,6 +5899,39 @@ out:
return ret;
}
+enum reshape_loc {
+ LOC_NO_RESHAPE,
+ LOC_AHEAD_OF_RESHAPE,
+ LOC_INSIDE_RESHAPE,
+ LOC_BEHIND_RESHAPE,
+};
+
+static enum reshape_loc get_reshape_loc(struct mddev *mddev,
+ struct r5conf *conf, sector_t logical_sector)
+{
+ sector_t reshape_progress, reshape_safe;
+ /*
+ * Spinlock is needed as reshape_progress may be
+ * 64bit on a 32bit platform, and so it might be
+ * possible to see a half-updated value
+ * Of course reshape_progress could change after
+ * the lock is dropped, so once we get a reference
+ * to the stripe that we think it is, we will have
+ * to check again.
+ */
+ spin_lock_irq(&conf->device_lock);
+ reshape_progress = conf->reshape_progress;
+ reshape_safe = conf->reshape_safe;
+ spin_unlock_irq(&conf->device_lock);
+ if (reshape_progress == MaxSector)
+ return LOC_NO_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
+ return LOC_AHEAD_OF_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
+ return LOC_INSIDE_RESHAPE;
+ return LOC_BEHIND_RESHAPE;
+}
+
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
@@ -5913,28 +5946,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
seq = read_seqcount_begin(&conf->gen_lock);
if (unlikely(conf->reshape_progress != MaxSector)) {
- /*
- * Spinlock is needed as reshape_progress may be
- * 64bit on a 32bit platform, and so it might be
- * possible to see a half-updated value
- * Of course reshape_progress could change after
- * the lock is dropped, so once we get a reference
- * to the stripe that we think it is, we will have
- * to check again.
- */
- spin_lock_irq(&conf->device_lock);
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_progress)) {
- previous = 1;
- } else {
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_safe)) {
- spin_unlock_irq(&conf->device_lock);
- ret = STRIPE_SCHEDULE_AND_RETRY;
- goto out;
- }
+ enum reshape_loc loc = get_reshape_loc(mddev, conf,
+ logical_sector);
+ if (loc == LOC_INSIDE_RESHAPE) {
+ ret = STRIPE_SCHEDULE_AND_RETRY;
+ goto out;
}
- spin_unlock_irq(&conf->device_lock);
+ if (loc == LOC_AHEAD_OF_RESHAPE)
+ previous = 1;
}
new_sector = raid5_compute_sector(conf, logical_sector, previous,
@@ -6078,8 +6097,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
}
- if (!md_write_start(mddev, bi))
- return false;
+ md_write_start(mddev, bi);
/*
* If array is degraded, better not do chunk aligned read because
* later we might have to read it again in order to reconstruct
@@ -6113,8 +6131,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
- !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
- ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
+ get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
md_write_end(mddev);
@@ -6255,7 +6272,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->reshape_backwards) {
- BUG_ON(writepos < reshape_sectors);
+ if (WARN_ON(writepos < reshape_sectors))
+ return MaxSector;
+
writepos -= reshape_sectors;
readpos += reshape_sectors;
safepos += reshape_sectors;
@@ -6273,14 +6292,18 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
* to set 'stripe_addr' which is where we will write to.
*/
if (mddev->reshape_backwards) {
- BUG_ON(conf->reshape_progress == 0);
+ if (WARN_ON(conf->reshape_progress == 0))
+ return MaxSector;
+
stripe_addr = writepos;
- BUG_ON((mddev->dev_sectors &
- ~((sector_t)reshape_sectors - 1))
- - reshape_sectors - stripe_addr
- != sector_nr);
+ if (WARN_ON((mddev->dev_sectors &
+ ~((sector_t)reshape_sectors - 1)) -
+ reshape_sectors - stripe_addr != sector_nr))
+ return MaxSector;
} else {
- BUG_ON(writepos != sector_nr + reshape_sectors);
+ if (WARN_ON(writepos != sector_nr + reshape_sectors))
+ return MaxSector;
+
stripe_addr = sector_nr;
}
@@ -6458,11 +6481,10 @@ ret:
}
static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r5conf *conf = mddev->private;
struct stripe_head *sh;
- sector_t max_sector = mddev->dev_sectors;
sector_t sync_blocks;
int still_degraded = 0;
int i;
@@ -7082,12 +7104,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
err = -ENODEV;
else if (new != conf->skip_copy) {
struct request_queue *q = mddev->gendisk->queue;
+ struct queue_limits lim = queue_limits_start_update(q);
conf->skip_copy = new;
if (new)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+ lim.features |= BLK_FEAT_STABLE_WRITES;
else
- blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+ lim.features &= ~BLK_FEAT_STABLE_WRITES;
+ err = queue_limits_commit_update(q, &lim);
}
mddev_unlock_and_resume(mddev);
return err ?: len;
@@ -7562,11 +7586,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (test_bit(Replacement, &rdev->flags)) {
if (disk->replacement)
goto abort;
- RCU_INIT_POINTER(disk->replacement, rdev);
+ disk->replacement = rdev;
} else {
if (disk->rdev)
goto abort;
- RCU_INIT_POINTER(disk->rdev, rdev);
+ disk->rdev = rdev;
}
if (test_bit(In_sync, &rdev->flags)) {
@@ -7702,13 +7726,13 @@ static int raid5_set_limits(struct mddev *mddev)
*/
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
- lim.raid_partial_stripes_expensive = 1;
+ lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
lim.discard_granularity = stripe;
lim.max_write_zeroes_sectors = 0;
- mddev_stack_rdev_limits(mddev, &lim);
+ mddev_stack_rdev_limits(mddev, &lim, 0);
rdev_for_each(rdev, mddev)
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
mddev->gendisk->disk_name);
@@ -8048,7 +8072,7 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}
-static void print_raid5_conf (struct r5conf *conf)
+static void print_raid5_conf(struct r5conf *conf)
{
struct md_rdev *rdev;
int i;
@@ -8062,15 +8086,13 @@ static void print_raid5_conf (struct r5conf *conf)
conf->raid_disks,
conf->raid_disks - conf->mddev->degraded);
- rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
- rdev = rcu_dereference(conf->disks[i].rdev);
+ rdev = conf->disks[i].rdev;
if (rdev)
pr_debug(" disk %d, o:%d, dev:%pg\n",
i, !test_bit(Faulty, &rdev->flags),
rdev->bdev);
}
- rcu_read_unlock();
}
static int raid5_spare_active(struct mddev *mddev)
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
index c8a33e1e910c..06090cc0a476 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
@@ -943,7 +943,7 @@ ipu6_isys_query_stream_by_source(struct ipu6_isys *isys, int source, u8 vc)
return NULL;
if (source < 0) {
- dev_err(&stream->isys->adev->auxdev.dev,
+ dev_err(&isys->adev->auxdev.dev,
"query stream with invalid port number\n");
return NULL;
}
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c
index 8b9b77719bb1..c4aff2e2009b 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c
@@ -799,7 +799,7 @@ static int isys_register_devices(struct ipu6_isys *isys)
isys->v4l2_dev.mdev = &isys->media_dev;
isys->v4l2_dev.ctrl_handler = NULL;
- ret = v4l2_device_register(&pdev->dev, &isys->v4l2_dev);
+ ret = v4l2_device_register(dev, &isys->v4l2_dev);
if (ret < 0)
goto out_media_device_unregister;
diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig
index 407a800c81bc..a7d9607ecdc6 100644
--- a/drivers/media/pci/intel/ivsc/Kconfig
+++ b/drivers/media/pci/intel/ivsc/Kconfig
@@ -4,6 +4,7 @@
config INTEL_VSC
tristate "Intel Visual Sensing Controller"
depends on INTEL_MEI && ACPI && VIDEO_DEV
+ depends on IPU_BRIDGE || !IPU_BRIDGE
select MEDIA_CONTROLLER
select VIDEO_V4L2_SUBDEV_API
select V4L2_FWNODE
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index f2c0f144c0fc..dacd3c96c9f5 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -210,6 +210,7 @@ static const struct regmap_access_table axp313a_volatile_table = {
static const struct regmap_range axp717_writeable_ranges[] = {
regmap_reg_range(AXP717_IRQ0_EN, AXP717_IRQ4_EN),
+ regmap_reg_range(AXP717_IRQ0_STATE, AXP717_IRQ4_STATE),
regmap_reg_range(AXP717_DCDC_OUTPUT_CONTROL, AXP717_CPUSLDO_CONTROL),
};
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 4c67e2c5a82e..a7a2bcedb37e 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1238,6 +1238,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
struct fastrpc_phy_page pages[1];
char *name;
int err;
+ bool scm_done = false;
struct {
int pgid;
u32 namelen;
@@ -1289,6 +1290,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
goto err_map;
}
+ scm_done = true;
}
}
@@ -1320,10 +1322,11 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
goto err_invoke;
kfree(args);
+ kfree(name);
return 0;
err_invoke:
- if (fl->cctx->vmcount) {
+ if (fl->cctx->vmcount && scm_done) {
u64 src_perms = 0;
struct qcom_scm_vmperm dst_perms;
u32 i;
@@ -1693,16 +1696,20 @@ static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr
{
struct fastrpc_invoke_args args[2] = { 0 };
- /* Capability filled in userspace */
+ /*
+ * Capability filled in userspace. This carries the information
+ * about the remoteproc support which is fetched from the remoteproc
+ * sysfs node by userspace.
+ */
dsp_attr_buf[0] = 0;
+ dsp_attr_buf_len -= 1;
args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len;
args[0].length = sizeof(dsp_attr_buf_len);
args[0].fd = -1;
args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
- args[1].length = dsp_attr_buf_len;
+ args[1].length = dsp_attr_buf_len * sizeof(u32);
args[1].fd = -1;
- fl->pd = USER_PD;
return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
FASTRPC_SCALARS(0, 1, 1), args);
@@ -1730,7 +1737,7 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
if (!dsp_attributes)
return -ENOMEM;
- err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
+ err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES);
if (err == DSP_UNSUPPORTED_API) {
dev_info(&cctx->rpdev->dev,
"Warning: DSP capabilities not supported on domain: %d\n", domain);
@@ -1783,7 +1790,7 @@ static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
if (err)
return err;
- if (copy_to_user(argp, &cap.capability, sizeof(cap.capability)))
+ if (copy_to_user(argp, &cap, sizeof(cap)))
return -EFAULT;
return 0;
@@ -2080,6 +2087,16 @@ err_invoke:
return err;
}
+static int is_attach_rejected(struct fastrpc_user *fl)
+{
+ /* Check if the device node is non-secure */
+ if (!fl->is_secure_dev) {
+ dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n");
+ return -EACCES;
+ }
+ return 0;
+}
+
static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -2092,13 +2109,19 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
err = fastrpc_invoke(fl, argp);
break;
case FASTRPC_IOCTL_INIT_ATTACH:
- err = fastrpc_init_attach(fl, ROOT_PD);
+ err = is_attach_rejected(fl);
+ if (!err)
+ err = fastrpc_init_attach(fl, ROOT_PD);
break;
case FASTRPC_IOCTL_INIT_ATTACH_SNS:
- err = fastrpc_init_attach(fl, SENSORS_PD);
+ err = is_attach_rejected(fl);
+ if (!err)
+ err = fastrpc_init_attach(fl, SENSORS_PD);
break;
case FASTRPC_IOCTL_INIT_CREATE_STATIC:
- err = fastrpc_init_create_static_process(fl, argp);
+ err = is_attach_rejected(fl);
+ if (!err)
+ err = fastrpc_init_create_static_process(fl, argp);
break;
case FASTRPC_IOCTL_INIT_CREATE:
err = fastrpc_init_create_process(fl, argp);
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
index 16695cb5e69c..7c3d8bedf90b 100644
--- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
@@ -153,7 +153,6 @@ static int pci1xxxx_eeprom_read(void *priv_t, unsigned int off,
buf[byte] = readl(rb + MMAP_EEPROM_OFFSET(EEPROM_DATA_REG));
}
- ret = byte;
error:
release_sys_lock(priv);
return ret;
@@ -197,7 +196,6 @@ static int pci1xxxx_eeprom_write(void *priv_t, unsigned int off,
goto error;
}
}
- ret = byte;
error:
release_sys_lock(priv);
return ret;
@@ -258,7 +256,6 @@ static int pci1xxxx_otp_read(void *priv_t, unsigned int off,
buf[byte] = readl(rb + MMAP_OTP_OFFSET(OTP_RD_DATA_OFFSET));
}
- ret = byte;
error:
release_sys_lock(priv);
return ret;
@@ -315,7 +312,6 @@ static int pci1xxxx_otp_write(void *priv_t, unsigned int off,
goto error;
}
}
- ret = byte;
error:
release_sys_lock(priv);
return ret;
diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c
index 1ec65d87488a..d02f6e881139 100644
--- a/drivers/misc/mei/platform-vsc.c
+++ b/drivers/misc/mei/platform-vsc.c
@@ -28,8 +28,8 @@
#define MEI_VSC_MAX_MSG_SIZE 512
-#define MEI_VSC_POLL_DELAY_US (50 * USEC_PER_MSEC)
-#define MEI_VSC_POLL_TIMEOUT_US (200 * USEC_PER_MSEC)
+#define MEI_VSC_POLL_DELAY_US (100 * USEC_PER_MSEC)
+#define MEI_VSC_POLL_TIMEOUT_US (400 * USEC_PER_MSEC)
#define mei_dev_to_vsc_hw(dev) ((struct mei_vsc_hw *)((dev)->hw))
diff --git a/drivers/misc/mei/vsc-fw-loader.c b/drivers/misc/mei/vsc-fw-loader.c
index 596a9d695dfc..084d0205f97d 100644
--- a/drivers/misc/mei/vsc-fw-loader.c
+++ b/drivers/misc/mei/vsc-fw-loader.c
@@ -204,7 +204,7 @@ struct vsc_img_frag {
/**
* struct vsc_fw_loader - represent vsc firmware loader
- * @dev: device used to request fimware
+ * @dev: device used to request firmware
* @tp: transport layer used with the firmware loader
* @csi: CSI image
* @ace: ACE image
diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c
index e6a98dba8a73..1618cca9a731 100644
--- a/drivers/misc/mei/vsc-tp.c
+++ b/drivers/misc/mei/vsc-tp.c
@@ -331,12 +331,12 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len)
return ret;
}
- ret = vsc_tp_dev_xfer(tp, tp->tx_buf, tp->rx_buf, len);
+ ret = vsc_tp_dev_xfer(tp, tp->tx_buf, ibuf ? tp->rx_buf : NULL, len);
if (ret)
return ret;
if (ibuf)
- cpu_to_be32_array(ibuf, tp->rx_buf, words);
+ be32_to_cpu_array(ibuf, tp->rx_buf, words);
return ret;
}
@@ -568,6 +568,19 @@ static void vsc_tp_remove(struct spi_device *spi)
free_irq(spi->irq, tp);
}
+static void vsc_tp_shutdown(struct spi_device *spi)
+{
+ struct vsc_tp *tp = spi_get_drvdata(spi);
+
+ platform_device_unregister(tp->pdev);
+
+ mutex_destroy(&tp->mutex);
+
+ vsc_tp_reset(tp);
+
+ free_irq(spi->irq, tp);
+}
+
static const struct acpi_device_id vsc_tp_acpi_ids[] = {
{ "INTC1009" }, /* Raptor Lake */
{ "INTC1058" }, /* Tiger Lake */
@@ -580,6 +593,7 @@ MODULE_DEVICE_TABLE(acpi, vsc_tp_acpi_ids);
static struct spi_driver vsc_tp_driver = {
.probe = vsc_tp_probe,
.remove = vsc_tp_remove,
+ .shutdown = vsc_tp_shutdown,
.driver = {
.name = "vsc-tp",
.acpi_match_table = vsc_tp_acpi_ids,
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 367509b5b646..2c9963248fcb 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2466,8 +2466,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
struct mmc_blk_data *md;
int devidx, ret;
char cap_str[10];
- bool cache_enabled = false;
- bool fua_enabled = false;
+ unsigned int features = 0;
devidx = ida_alloc_max(&mmc_blk_ida, max_devices - 1, GFP_KERNEL);
if (devidx < 0) {
@@ -2499,7 +2498,24 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
*/
md->read_only = mmc_blk_readonly(card);
- md->disk = mmc_init_queue(&md->queue, card);
+ if (mmc_host_cmd23(card->host)) {
+ if ((mmc_card_mmc(card) &&
+ card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
+ (mmc_card_sd(card) &&
+ card->scr.cmds & SD_SCR_CMD23_SUPPORT))
+ md->flags |= MMC_BLK_CMD23;
+ }
+
+ if (md->flags & MMC_BLK_CMD23 &&
+ ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
+ card->ext_csd.rel_sectors)) {
+ md->flags |= MMC_BLK_REL_WR;
+ features |= (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ } else if (mmc_cache_enabled(card->host)) {
+ features |= BLK_FEAT_WRITE_CACHE;
+ }
+
+ md->disk = mmc_init_queue(&md->queue, card, features);
if (IS_ERR(md->disk)) {
ret = PTR_ERR(md->disk);
goto err_kfree;
@@ -2539,26 +2555,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
set_capacity(md->disk, size);
- if (mmc_host_cmd23(card->host)) {
- if ((mmc_card_mmc(card) &&
- card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
- (mmc_card_sd(card) &&
- card->scr.cmds & SD_SCR_CMD23_SUPPORT))
- md->flags |= MMC_BLK_CMD23;
- }
-
- if (md->flags & MMC_BLK_CMD23 &&
- ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
- card->ext_csd.rel_sectors)) {
- md->flags |= MMC_BLK_REL_WR;
- fua_enabled = true;
- cache_enabled = true;
- }
- if (mmc_cache_enabled(card->host))
- cache_enabled = true;
-
- blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
-
string_get_size((u64)size, 512, STRING_UNITS_2,
cap_str, sizeof(cap_str));
pr_info("%s: %s %s %s%s\n",
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 241cdc2b2a2a..d0b3ca8a11f0 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -344,10 +344,12 @@ static const struct blk_mq_ops mmc_mq_ops = {
};
static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
- struct mmc_card *card)
+ struct mmc_card *card, unsigned int features)
{
struct mmc_host *host = card->host;
- struct queue_limits lim = { };
+ struct queue_limits lim = {
+ .features = features,
+ };
struct gendisk *disk;
if (mmc_can_erase(card))
@@ -376,18 +378,16 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
lim.max_segments = host->max_segs;
}
+ if (mmc_host_is_spi(host) && host->use_spi_crc)
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+
disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq);
if (IS_ERR(disk))
return disk;
mq->queue = disk->queue;
- if (mmc_host_is_spi(host) && host->use_spi_crc)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
blk_queue_rq_timeout(mq->queue, 60 * HZ);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
-
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
@@ -413,10 +413,12 @@ static inline bool mmc_merge_capable(struct mmc_host *host)
* mmc_init_queue - initialise a queue structure.
* @mq: mmc queue
* @card: mmc card to attach this queue
+ * @features: block layer features (BLK_FEAT_*)
*
* Initialise a MMC card request queue.
*/
-struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
+struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
+ unsigned int features)
{
struct mmc_host *host = card->host;
struct gendisk *disk;
@@ -460,7 +462,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
return ERR_PTR(ret);
- disk = mmc_alloc_disk(mq, card);
+ disk = mmc_alloc_disk(mq, card, features);
if (IS_ERR(disk))
blk_mq_free_tag_set(&mq->tag_set);
return disk;
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index 9ade3bcbb714..1498840a4ea0 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -94,7 +94,8 @@ struct mmc_queue {
struct work_struct complete_work;
};
-struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card);
+struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
+ unsigned int features);
extern void mmc_cleanup_queue(struct mmc_queue *);
extern void mmc_queue_suspend(struct mmc_queue *);
extern void mmc_queue_resume(struct mmc_queue *);
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index d7427894e0bc..c302eb380e42 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -224,6 +224,9 @@ static void davinci_fifo_data_trans(struct mmc_davinci_host *host,
}
p = sgm->addr;
+ if (n > sgm->length)
+ n = sgm->length;
+
/* NOTE: we never transfer more than rw_threshold bytes
* to/from the fifo here; there's no I/O overlap.
* This also assumes that access width( i.e. ACCWD) is 4 bytes
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 9a5f75163aca..8ede4ce93271 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -131,10 +131,12 @@ struct moxart_host {
struct dma_async_tx_descriptor *tx_desc;
struct mmc_host *mmc;
struct mmc_request *mrq;
+ struct scatterlist *cur_sg;
struct completion dma_complete;
struct completion pio_complete;
- struct sg_mapping_iter sg_miter;
+ u32 num_sg;
+ u32 data_remain;
u32 data_len;
u32 fifo_width;
u32 timeout;
@@ -146,6 +148,35 @@ struct moxart_host {
bool is_removed;
};
+static inline void moxart_init_sg(struct moxart_host *host,
+ struct mmc_data *data)
+{
+ host->cur_sg = data->sg;
+ host->num_sg = data->sg_len;
+ host->data_remain = host->cur_sg->length;
+
+ if (host->data_remain > host->data_len)
+ host->data_remain = host->data_len;
+}
+
+static inline int moxart_next_sg(struct moxart_host *host)
+{
+ int remain;
+ struct mmc_data *data = host->mrq->cmd->data;
+
+ host->cur_sg++;
+ host->num_sg--;
+
+ if (host->num_sg > 0) {
+ host->data_remain = host->cur_sg->length;
+ remain = host->data_len - data->bytes_xfered;
+ if (remain > 0 && remain < host->data_remain)
+ host->data_remain = remain;
+ }
+
+ return host->num_sg;
+}
+
static int moxart_wait_for_status(struct moxart_host *host,
u32 mask, u32 *status)
{
@@ -278,29 +309,14 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
static void moxart_transfer_pio(struct moxart_host *host)
{
- struct sg_mapping_iter *sgm = &host->sg_miter;
struct mmc_data *data = host->mrq->cmd->data;
u32 *sgp, len = 0, remain, status;
if (host->data_len == data->bytes_xfered)
return;
- /*
- * By updating sgm->consumes this will get a proper pointer into the
- * buffer at any time.
- */
- if (!sg_miter_next(sgm)) {
- /* This shold not happen */
- dev_err(mmc_dev(host->mmc), "ran out of scatterlist prematurely\n");
- data->error = -EINVAL;
- complete(&host->pio_complete);
- return;
- }
- sgp = sgm->addr;
- remain = sgm->length;
- if (remain > host->data_len)
- remain = host->data_len;
- sgm->consumed = 0;
+ sgp = sg_virt(host->cur_sg);
+ remain = host->data_remain;
if (data->flags & MMC_DATA_WRITE) {
while (remain > 0) {
@@ -315,7 +331,6 @@ static void moxart_transfer_pio(struct moxart_host *host)
sgp++;
len += 4;
}
- sgm->consumed += len;
remain -= len;
}
@@ -332,22 +347,22 @@ static void moxart_transfer_pio(struct moxart_host *host)
sgp++;
len += 4;
}
- sgm->consumed += len;
remain -= len;
}
}
- data->bytes_xfered += sgm->consumed;
- if (host->data_len == data->bytes_xfered) {
+ data->bytes_xfered += host->data_remain - remain;
+ host->data_remain = remain;
+
+ if (host->data_len != data->bytes_xfered)
+ moxart_next_sg(host);
+ else
complete(&host->pio_complete);
- return;
- }
}
static void moxart_prepare_data(struct moxart_host *host)
{
struct mmc_data *data = host->mrq->cmd->data;
- unsigned int flags = SG_MITER_ATOMIC; /* Used from IRQ */
u32 datactrl;
int blksz_bits;
@@ -358,19 +373,15 @@ static void moxart_prepare_data(struct moxart_host *host)
blksz_bits = ffs(data->blksz) - 1;
BUG_ON(1 << blksz_bits != data->blksz);
+ moxart_init_sg(host, data);
+
datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE);
- if (data->flags & MMC_DATA_WRITE) {
- flags |= SG_MITER_FROM_SG;
+ if (data->flags & MMC_DATA_WRITE)
datactrl |= DCR_DATA_WRITE;
- } else {
- flags |= SG_MITER_TO_SG;
- }
if (moxart_use_dma(host))
datactrl |= DCR_DMA_EN;
- else
- sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL);
writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR);
@@ -443,9 +454,6 @@ static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq)
}
request_done:
- if (!moxart_use_dma(host))
- sg_miter_stop(&host->sg_miter);
-
spin_unlock_irqrestore(&host->lock, flags);
mmc_request_done(host->mmc, mrq);
}
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index 9053526fa212..150fb477b7cc 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -24,6 +24,7 @@
#define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0)
#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1)
#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2)
+#define BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY BIT(4)
#define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0)
#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1)
@@ -384,6 +385,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT)
host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+ if (!(match_priv->flags & BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY))
+ host->mmc_host_ops.card_busy = NULL;
+
/* Change the base clock frequency if the DT property exists */
if (device_property_read_u32(&pdev->dev, "clock-frequency",
&priv->base_freq_hz) != 0)
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index ef89ec382bfe..23e6ba70144c 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1326,7 +1326,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch);
if (ret)
- return ret;
+ goto fail;
/*
* Turn PMOS on [bit 0], set over current detection to 2.4 V
@@ -1337,7 +1337,10 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
else
scratch &= ~0x47;
- return pci_write_config_byte(chip->pdev, 0xAE, scratch);
+ ret = pci_write_config_byte(chip->pdev, 0xAE, scratch);
+
+fail:
+ return pcibios_err_to_errno(ret);
}
static int jmicron_probe(struct sdhci_pci_chip *chip)
@@ -2202,7 +2205,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev,
ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots);
if (ret)
- return ret;
+ return pcibios_err_to_errno(ret);
slots = PCI_SLOT_INFO_SLOTS(slots) + 1;
dev_dbg(&pdev->dev, "found %d slot(s)\n", slots);
@@ -2211,7 +2214,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev,
ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar);
if (ret)
- return ret;
+ return pcibios_err_to_errno(ret);
first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK;
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index d4a02184784a..058bef1c7e41 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -823,7 +823,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -834,7 +834,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_CLKREQ, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x20;
pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch);
@@ -843,7 +843,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
*/
ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x01;
pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch);
pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73);
@@ -856,7 +856,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_INF_MOD, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x08;
pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch);
@@ -864,7 +864,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -875,7 +875,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -886,7 +886,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG0,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 = ((scratch_32 & 0xFF000000) >> 24);
/* Check Whether subId is 0x11 or 0x12 */
@@ -898,7 +898,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG4,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
/* Enable Base Clk setting change */
scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET;
@@ -921,7 +921,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CLK_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0xFF00);
scratch_32 |= 0x07E0C800;
@@ -931,14 +931,14 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CLKREQ, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 |= 0x3;
pci_write_config_dword(chip->pdev, O2_SD_CLKREQ, scratch_32);
ret = pci_read_config_dword(chip->pdev,
O2_SD_PLL_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0x1F3F070E);
scratch_32 |= 0x18270106;
@@ -949,7 +949,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CAP_REG2, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0xE0);
pci_write_config_dword(chip->pdev,
O2_SD_CAP_REG2, scratch_32);
@@ -961,7 +961,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -971,7 +971,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -979,7 +979,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_PLL_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
if ((scratch_32 & 0xff000000) == 0x01000000) {
scratch_32 &= 0x0000FFFF;
@@ -998,7 +998,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG4,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 |= (1 << 22);
pci_write_config_dword(chip->pdev,
O2_SD_FUNC_REG4, scratch_32);
@@ -1017,7 +1017,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -1028,7 +1028,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
/* UnLock WP */
ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -1057,13 +1057,16 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
/* Lock WP */
ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
}
return 0;
+
+read_fail:
+ return pcibios_err_to_errno(ret);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 746f4cf7ab03..fbf7a91bed35 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2515,26 +2515,29 @@ EXPORT_SYMBOL_GPL(sdhci_get_cd_nogpio);
static int sdhci_check_ro(struct sdhci_host *host)
{
- unsigned long flags;
+ bool allow_invert = false;
int is_readonly;
- spin_lock_irqsave(&host->lock, flags);
-
- if (host->flags & SDHCI_DEVICE_DEAD)
+ if (host->flags & SDHCI_DEVICE_DEAD) {
is_readonly = 0;
- else if (host->ops->get_ro)
+ } else if (host->ops->get_ro) {
is_readonly = host->ops->get_ro(host);
- else if (mmc_can_gpio_ro(host->mmc))
+ } else if (mmc_can_gpio_ro(host->mmc)) {
is_readonly = mmc_gpio_get_ro(host->mmc);
- else
+ /* Do not invert twice */
+ allow_invert = !(host->mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
+ } else {
is_readonly = !(sdhci_readl(host, SDHCI_PRESENT_STATE)
& SDHCI_WRITE_PROTECT);
+ allow_invert = true;
+ }
- spin_unlock_irqrestore(&host->lock, flags);
+ if (is_readonly >= 0 &&
+ allow_invert &&
+ (host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT))
+ is_readonly = !is_readonly;
- /* This quirk needs to be replaced by a callback-function later */
- return host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT ?
- !is_readonly : is_readonly;
+ return is_readonly;
}
#define SAMPLE_COUNT 5
@@ -4724,6 +4727,21 @@ int sdhci_setup_host(struct sdhci_host *host)
if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) {
host->max_adma = 65532; /* 32-bit alignment */
mmc->max_seg_size = 65535;
+ /*
+ * sdhci_adma_table_pre() expects to define 1 DMA
+ * descriptor per segment, so the maximum segment size
+ * is set accordingly. SDHCI allows up to 64KiB per DMA
+ * descriptor (16-bit field), but some controllers do
+ * not support "zero means 65536" reducing the maximum
+ * for them to 65535. That is a problem if PAGE_SIZE is
+ * 64KiB because the block layer does not support
+ * max_seg_size < PAGE_SIZE, however
+ * sdhci_adma_table_pre() has a workaround to handle
+ * that case, and split the descriptor. Refer also
+ * comment in sdhci_adma_table_pre().
+ */
+ if (mmc->max_seg_size < PAGE_SIZE)
+ mmc->max_seg_size = PAGE_SIZE;
} else {
mmc->max_seg_size = 65536;
}
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 3caa0717d46c..47ead84407cd 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -336,6 +336,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
lim.logical_block_size = tr->blksize;
if (tr->discard)
lim.max_hw_discard_sectors = UINT_MAX;
+ if (tr->flush)
+ lim.features |= BLK_FEAT_WRITE_CACHE;
/* Create gendisk */
gd = blk_mq_alloc_disk(new->tag_set, &lim, new);
@@ -372,13 +374,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
/* Create the request queue */
spin_lock_init(&new->queue_lock);
INIT_LIST_HEAD(&new->rq_list);
-
- if (tr->flush)
- blk_queue_write_cache(new->rq, true, false);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
-
gd->queue = new->rq;
if (new->readonly)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d7dbbd469b89..53e16d39af4b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1093,28 +1093,32 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs,
unsigned int offset_in_page)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ bool ident_stage = !mtd->writesize;
- /* Make sure the offset is less than the actual page size. */
- if (offset_in_page > mtd->writesize + mtd->oobsize)
- return -EINVAL;
+ /* Bypass all checks during NAND identification */
+ if (likely(!ident_stage)) {
+ /* Make sure the offset is less than the actual page size. */
+ if (offset_in_page > mtd->writesize + mtd->oobsize)
+ return -EINVAL;
- /*
- * On small page NANDs, there's a dedicated command to access the OOB
- * area, and the column address is relative to the start of the OOB
- * area, not the start of the page. Asjust the address accordingly.
- */
- if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize)
- offset_in_page -= mtd->writesize;
+ /*
+ * On small page NANDs, there's a dedicated command to access the OOB
+ * area, and the column address is relative to the start of the OOB
+ * area, not the start of the page. Asjust the address accordingly.
+ */
+ if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize)
+ offset_in_page -= mtd->writesize;
- /*
- * The offset in page is expressed in bytes, if the NAND bus is 16-bit
- * wide, then it must be divided by 2.
- */
- if (chip->options & NAND_BUSWIDTH_16) {
- if (WARN_ON(offset_in_page % 2))
- return -EINVAL;
+ /*
+ * The offset in page is expressed in bytes, if the NAND bus is 16-bit
+ * wide, then it must be divided by 2.
+ */
+ if (chip->options & NAND_BUSWIDTH_16) {
+ if (WARN_ON(offset_in_page % 2))
+ return -EINVAL;
- offset_in_page /= 2;
+ offset_in_page /= 2;
+ }
}
addrs[0] = offset_in_page;
@@ -1123,7 +1127,7 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs,
* Small page NANDs use 1 cycle for the columns, while large page NANDs
* need 2
*/
- if (mtd->writesize <= 512)
+ if (!ident_stage && mtd->writesize <= 512)
return 1;
addrs[1] = offset_in_page >> 8;
@@ -1436,16 +1440,19 @@ int nand_change_read_column_op(struct nand_chip *chip,
unsigned int len, bool force_8bit)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ bool ident_stage = !mtd->writesize;
if (len && !buf)
return -EINVAL;
- if (offset_in_page + len > mtd->writesize + mtd->oobsize)
- return -EINVAL;
+ if (!ident_stage) {
+ if (offset_in_page + len > mtd->writesize + mtd->oobsize)
+ return -EINVAL;
- /* Small page NANDs do not support column change. */
- if (mtd->writesize <= 512)
- return -ENOTSUPP;
+ /* Small page NANDs do not support column change. */
+ if (mtd->writesize <= 512)
+ return -ENOTSUPP;
+ }
if (nand_has_exec_op(chip)) {
const struct nand_interface_config *conf =
@@ -2173,7 +2180,7 @@ EXPORT_SYMBOL_GPL(nand_reset_op);
int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
bool force_8bit, bool check_only)
{
- if (!len || !buf)
+ if (!len || (!check_only && !buf))
return -EINVAL;
if (nand_has_exec_op(chip)) {
@@ -6301,6 +6308,7 @@ static const struct nand_ops rawnand_ops = {
static int nand_scan_tail(struct nand_chip *chip)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ struct nand_device *base = &chip->base;
struct nand_ecc_ctrl *ecc = &chip->ecc;
int ret, i;
@@ -6445,9 +6453,13 @@ static int nand_scan_tail(struct nand_chip *chip)
if (!ecc->write_oob_raw)
ecc->write_oob_raw = ecc->write_oob;
- /* propagate ecc info to mtd_info */
+ /* Propagate ECC info to the generic NAND and MTD layers */
mtd->ecc_strength = ecc->strength;
+ if (!base->ecc.ctx.conf.strength)
+ base->ecc.ctx.conf.strength = ecc->strength;
mtd->ecc_step_size = ecc->size;
+ if (!base->ecc.ctx.conf.step_size)
+ base->ecc.ctx.conf.step_size = ecc->size;
/*
* Set the number of read / write steps for one page depending on ECC
@@ -6455,6 +6467,8 @@ static int nand_scan_tail(struct nand_chip *chip)
*/
if (!ecc->steps)
ecc->steps = mtd->writesize / ecc->size;
+ if (!base->ecc.ctx.nsteps)
+ base->ecc.ctx.nsteps = ecc->steps;
if (ecc->steps * ecc->size != mtd->writesize) {
WARN(1, "Invalid ECC parameters\n");
ret = -EINVAL;
diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c
index 7baaef69d70a..55580447633b 100644
--- a/drivers/mtd/nand/raw/rockchip-nand-controller.c
+++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c
@@ -420,13 +420,13 @@ static int rk_nfc_setup_interface(struct nand_chip *chip, int target,
u32 rate, tc2rw, trwpw, trw2c;
u32 temp;
- if (target < 0)
- return 0;
-
timings = nand_get_sdr_timings(conf);
if (IS_ERR(timings))
return -EOPNOTSUPP;
+ if (target < 0)
+ return 0;
+
if (IS_ERR(nfc->nfc_clk))
rate = clk_get_rate(nfc->ahb_clk);
else
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3c3fcce4acd4..d19aabf5d4fb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5773,6 +5773,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
if (real_dev) {
ret = ethtool_get_ts_info_by_layer(real_dev, info);
} else {
+ info->phc_index = -1;
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
/* Check if all slaves support software tx timestamping */
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter) {
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0cacd7027e35..bc80fb6397dc 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1214,9 +1214,9 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
__be32 target;
if (newval->string) {
- if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) {
- netdev_err(bond->dev, "invalid ARP target %pI4 specified\n",
- &target);
+ if (strlen(newval->string) < 1 ||
+ !in4_pton(newval->string + 1, -1, (u8 *)&target, -1, NULL)) {
+ netdev_err(bond->dev, "invalid ARP target specified\n");
return ret;
}
if (newval->string[0] == '+')
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 1d9057dc44f2..bf1589aef1fc 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1618,11 +1618,20 @@ static int mcp251xfd_open(struct net_device *ndev)
clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
can_rx_offload_enable(&priv->offload);
+ priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq",
+ WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ dev_name(&spi->dev));
+ if (!priv->wq) {
+ err = -ENOMEM;
+ goto out_can_rx_offload_disable;
+ }
+ INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync);
+
err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
IRQF_SHARED | IRQF_ONESHOT,
dev_name(&spi->dev), priv);
if (err)
- goto out_can_rx_offload_disable;
+ goto out_destroy_workqueue;
err = mcp251xfd_chip_interrupts_enable(priv);
if (err)
@@ -1634,6 +1643,8 @@ static int mcp251xfd_open(struct net_device *ndev)
out_free_irq:
free_irq(spi->irq, priv);
+ out_destroy_workqueue:
+ destroy_workqueue(priv->wq);
out_can_rx_offload_disable:
can_rx_offload_disable(&priv->offload);
set_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
@@ -1661,6 +1672,7 @@ static int mcp251xfd_stop(struct net_device *ndev)
hrtimer_cancel(&priv->tx_irq_timer);
mcp251xfd_chip_interrupts_disable(priv);
free_irq(ndev->irq, priv);
+ destroy_workqueue(priv->wq);
can_rx_offload_disable(&priv->offload);
mcp251xfd_timestamp_stop(priv);
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
index 160528d3cc26..b1de8052a45c 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
@@ -131,6 +131,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
tx_obj->xfer[0].len = len;
}
+static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_tx_ring *tx_ring,
+ int err)
+{
+ struct net_device *ndev = priv->ndev;
+ struct net_device_stats *stats = &ndev->stats;
+ unsigned int frame_len = 0;
+ u8 tx_head;
+
+ tx_ring->head--;
+ stats->tx_dropped++;
+ tx_head = mcp251xfd_get_tx_head(tx_ring);
+ can_free_echo_skb(ndev, tx_head, &frame_len);
+ netdev_completed_queue(ndev, 1, frame_len);
+ netif_wake_queue(ndev);
+
+ if (net_ratelimit())
+ netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+}
+
+void mcp251xfd_tx_obj_write_sync(struct work_struct *work)
+{
+ struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv,
+ tx_work);
+ struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj;
+ struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ int err;
+
+ err = spi_sync(priv->spi, &tx_obj->msg);
+ if (err)
+ mcp251xfd_tx_failure_drop(priv, tx_ring, err);
+}
+
static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
struct mcp251xfd_tx_obj *tx_obj)
{
@@ -162,6 +195,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
return false;
}
+static bool mcp251xfd_work_busy(struct work_struct *work)
+{
+ return work_busy(work);
+}
+
netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
@@ -175,7 +213,8 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
if (can_dev_dropped_skb(ndev, skb))
return NETDEV_TX_OK;
- if (mcp251xfd_tx_busy(priv, tx_ring))
+ if (mcp251xfd_tx_busy(priv, tx_ring) ||
+ mcp251xfd_work_busy(&priv->tx_work))
return NETDEV_TX_BUSY;
tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
@@ -193,13 +232,13 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
netdev_sent_queue(priv->ndev, frame_len);
err = mcp251xfd_tx_obj_write(priv, tx_obj);
- if (err)
- goto out_err;
-
- return NETDEV_TX_OK;
-
- out_err:
- netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+ if (err == -EBUSY) {
+ netif_stop_queue(ndev);
+ priv->tx_work_obj = tx_obj;
+ queue_work(priv->wq, &priv->tx_work);
+ } else if (err) {
+ mcp251xfd_tx_failure_drop(priv, tx_ring, err);
+ }
return NETDEV_TX_OK;
}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 24510b3b8020..b35bfebd23f2 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -633,6 +633,10 @@ struct mcp251xfd_priv {
struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM];
struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM];
+ struct workqueue_struct *wq;
+ struct work_struct tx_work;
+ struct mcp251xfd_tx_obj *tx_work_obj;
+
DECLARE_BITMAP(flags, __MCP251XFD_FLAGS_SIZE__);
u8 rx_ring_num;
@@ -952,6 +956,7 @@ void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
+void mcp251xfd_tx_obj_write_sync(struct work_struct *work);
netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
struct net_device *ndev);
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 8faf8a462c05..024169461cad 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -125,6 +125,7 @@ static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_liste
static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = {
.quirks = 0,
+ .family = KVASER_LEAF,
.ops = &kvaser_usb_leaf_dev_ops,
};
@@ -294,7 +295,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd,
}
usb_free_urb(urb);
- return 0;
+ return err;
}
int kvaser_usb_can_rx_over_error(struct net_device *netdev)
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 02f07b870f10..268949939636 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(lan9303_mib);
}
-static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum)
+static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_read(chip, regnum);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_read(chip, phy, regnum);
+ return chip->ops->phy_read(chip, phy_base + port, regnum);
}
-static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
+static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum,
u16 val)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_write(chip, regnum, val);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_write(chip, phy, regnum, val);
+ return chip->ops->phy_write(chip, phy_base + port, regnum, val);
}
static int lan9303_port_enable(struct dsa_switch *ds, int port,
@@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port)
vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port);
lan9303_disable_processing_port(chip, port);
- lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
+ lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN);
}
static int lan9303_port_bridge_join(struct dsa_switch *ds, int port,
@@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = {
static int lan9303_register_switch(struct lan9303 *chip)
{
- int base;
-
chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL);
if (!chip->ds)
return -ENOMEM;
@@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip)
chip->ds->priv = chip;
chip->ds->ops = &lan9303_switch_ops;
chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops;
- base = chip->phy_addr_base;
- chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base);
+ chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0);
return dsa_register_switch(chip->ds);
}
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index f8ad7833f5d9..425e20daf1e9 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -355,10 +355,8 @@ int ksz9477_reset_switch(struct ksz_device *dev)
SPI_AUTO_EDGE_DETECTION, 0);
/* default configuration */
- ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
- data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING |
- SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE;
- ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+ ksz_write8(dev, REG_SW_LUE_CTRL_1,
+ SW_AGING_ENABLE | SW_LINK_AUTO_AGING | SW_SRC_ADDR_FILTER);
/* disable interrupts */
ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK);
@@ -429,6 +427,57 @@ void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze)
mutex_unlock(&p->mib.cnt_mutex);
}
+int ksz9477_errata_monitor(struct ksz_device *dev, int port,
+ u64 tx_late_col)
+{
+ u32 pmavbc;
+ u8 status;
+ u16 pqm;
+ int ret;
+
+ ret = ksz_pread8(dev, port, REG_PORT_STATUS_0, &status);
+ if (ret)
+ return ret;
+ if (!(FIELD_GET(PORT_INTF_SPEED_MASK, status) == PORT_INTF_SPEED_NONE) &&
+ !(status & PORT_INTF_FULL_DUPLEX)) {
+ /* Errata DS80000754 recommends monitoring potential faults in
+ * half-duplex mode. The switch might not be able to communicate anymore
+ * in these states.
+ * If you see this message, please read the errata-sheet for more information:
+ * https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9477S-Errata-DS80000754.pdf
+ * To workaround this issue, half-duplex mode should be avoided.
+ * A software reset could be implemented to recover from this state.
+ */
+ dev_warn_once(dev->dev,
+ "Half-duplex detected on port %d, transmission halt may occur\n",
+ port);
+ if (tx_late_col != 0) {
+ /* Transmission halt with late collisions */
+ dev_crit_once(dev->dev,
+ "TX late collisions detected, transmission may be halted on port %d\n",
+ port);
+ }
+ ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &status);
+ if (ret)
+ return ret;
+ if (status & SW_VLAN_ENABLE) {
+ ret = ksz_pread16(dev, port, REG_PORT_QM_TX_CNT_0__4, &pqm);
+ if (ret)
+ return ret;
+ ret = ksz_read32(dev, REG_PMAVBC, &pmavbc);
+ if (ret)
+ return ret;
+ if ((FIELD_GET(PMAVBC_MASK, pmavbc) <= PMAVBC_MIN) ||
+ (FIELD_GET(PORT_QM_TX_CNT_M, pqm) >= PORT_QM_TX_CNT_MAX)) {
+ /* Transmission halt with Half-Duplex and VLAN */
+ dev_crit_once(dev->dev,
+ "resources out of limits, transmission may be halted\n");
+ }
+ }
+ }
+ return ret;
+}
+
void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
{
struct ksz_port_mib *mib = &dev->ports[port].mib;
@@ -1299,6 +1348,10 @@ int ksz9477_setup(struct dsa_switch *ds)
/* Enable REG_SW_MTU__2 reg by setting SW_JUMBO_PACKET */
ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_JUMBO_PACKET, true);
+ /* Use collision based back pressure mode. */
+ ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_BACK_PRESSURE,
+ SW_BACK_PRESSURE_COLLISION);
+
/* Now we can configure default MTU value */
ret = regmap_update_bits(ksz_regmap_16(dev), REG_SW_MTU__2, REG_SW_MTU_MASK,
VLAN_ETH_FRAME_LEN + ETH_FCS_LEN);
diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h
index ce1e656b800b..239a281da10b 100644
--- a/drivers/net/dsa/microchip/ksz9477.h
+++ b/drivers/net/dsa/microchip/ksz9477.h
@@ -36,6 +36,8 @@ int ksz9477_port_mirror_add(struct ksz_device *dev, int port,
bool ingress, struct netlink_ext_ack *extack);
void ksz9477_port_mirror_del(struct ksz_device *dev, int port,
struct dsa_mall_mirror_tc_entry *mirror);
+int ksz9477_errata_monitor(struct ksz_device *dev, int port,
+ u64 tx_late_col);
void ksz9477_get_caps(struct ksz_device *dev, int port,
struct phylink_config *config);
int ksz9477_fdb_dump(struct ksz_device *dev, int port,
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index f3a205ee483f..d5354c600ea1 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -247,6 +247,7 @@
#define REG_SW_MAC_CTRL_1 0x0331
#define SW_BACK_PRESSURE BIT(5)
+#define SW_BACK_PRESSURE_COLLISION 0
#define FAIR_FLOW_CTRL BIT(4)
#define NO_EXC_COLLISION_DROP BIT(3)
#define SW_JUMBO_PACKET BIT(2)
@@ -842,8 +843,8 @@
#define REG_PORT_STATUS_0 0x0030
-#define PORT_INTF_SPEED_M 0x3
-#define PORT_INTF_SPEED_S 3
+#define PORT_INTF_SPEED_MASK GENMASK(4, 3)
+#define PORT_INTF_SPEED_NONE GENMASK(1, 0)
#define PORT_INTF_FULL_DUPLEX BIT(2)
#define PORT_TX_FLOW_CTRL BIT(1)
#define PORT_RX_FLOW_CTRL BIT(0)
@@ -1167,6 +1168,11 @@
#define PORT_RMII_CLK_SEL BIT(7)
#define PORT_MII_SEL_EDGE BIT(5)
+#define REG_PMAVBC 0x03AC
+
+#define PMAVBC_MASK GENMASK(26, 16)
+#define PMAVBC_MIN 0x580
+
/* 4 - MAC */
#define REG_PORT_MAC_CTRL_0 0x0400
@@ -1494,6 +1500,7 @@
#define PORT_QM_TX_CNT_USED_S 0
#define PORT_QM_TX_CNT_M (BIT(11) - 1)
+#define PORT_QM_TX_CNT_MAX 0x200
#define REG_PORT_QM_TX_CNT_1__4 0x0A14
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 2818e24e2a51..0580b2fee21c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1382,6 +1382,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.tc_cbs_supported = true,
.ops = &ksz9477_dev_ops,
.phylink_mac_ops = &ksz9477_phylink_mac_ops,
+ .phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1416,6 +1417,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_ipms = 8,
.ops = &ksz9477_dev_ops,
.phylink_mac_ops = &ksz9477_phylink_mac_ops,
+ .phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1450,6 +1452,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_ipms = 8,
.ops = &ksz9477_dev_ops,
.phylink_mac_ops = &ksz9477_phylink_mac_ops,
+ .phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1540,6 +1543,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.tc_cbs_supported = true,
.ops = &ksz9477_dev_ops,
.phylink_mac_ops = &ksz9477_phylink_mac_ops,
+ .phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
@@ -1820,6 +1824,7 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port)
struct rtnl_link_stats64 *stats;
struct ksz_stats_raw *raw;
struct ksz_port_mib *mib;
+ int ret;
mib = &dev->ports[port].mib;
stats = &mib->stats64;
@@ -1861,6 +1866,12 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port)
pstats->rx_pause_frames = raw->rx_pause;
spin_unlock(&mib->stats64_lock);
+
+ if (dev->info->phy_errata_9477) {
+ ret = ksz9477_errata_monitor(dev, port, raw->tx_late_col);
+ if (ret)
+ dev_err(dev->dev, "Failed to monitor transmission halt\n");
+ }
}
void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port)
@@ -2185,7 +2196,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d)
struct ksz_device *dev = kirq->dev;
int ret;
- ret = ksz_write32(dev, kirq->reg_mask, kirq->masked);
+ ret = ksz_write8(dev, kirq->reg_mask, kirq->masked);
if (ret)
dev_err(dev->dev, "failed to change IRQ mask\n");
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index c784fd23a993..ee7db46e469d 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -66,6 +66,7 @@ struct ksz_chip_data {
bool tc_cbs_supported;
const struct ksz_dev_ops *ops;
const struct phylink_mac_ops *phylink_mac_ops;
+ bool phy_errata_9477;
bool ksz87xx_eee_link_erratum;
const struct ksz_mib_names *mib_names;
int mib_cnt;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index a806dadc4196..20c6529ec135 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev)
dev_err(dev, "Cannot create eth interface %d\n", i);
bcmasp_remove_intfs(priv);
of_node_put(intf_node);
+ ret = -ENOMEM;
goto of_put_exit;
}
list_add_tail(&intf->list, &priv->intfs);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index e2a4e1088b7f..9580ab83d387 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1262,7 +1262,7 @@ enum {
struct bnx2x_fw_stats_req {
struct stats_query_header hdr;
- struct stats_query_entry query[FP_SB_MAX_E1x+
+ struct stats_query_entry query[FP_SB_MAX_E2 +
BNX2X_FIRST_QUEUE_QUERY_IDX];
};
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c437ca1c0fd3..43952689bfb0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -732,9 +732,6 @@ tx_done:
return NETDEV_TX_OK;
tx_dma_error:
- if (BNXT_TX_PTP_IS_SET(lflags))
- atomic_inc(&bp->ptp_cfg->tx_avail);
-
last_frag = i;
/* start back at beginning and unmap skb */
@@ -756,6 +753,8 @@ tx_dma_error:
tx_free:
dev_kfree_skb_any(skb);
tx_kick_pending:
+ if (BNXT_TX_PTP_IS_SET(lflags))
+ atomic_inc(&bp->ptp_cfg->tx_avail);
if (txr->kick_pending)
bnxt_txr_db_kick(bp, txr, txr->tx_prod);
txr->tx_buf_ring[txr->tx_prod].skb = NULL;
@@ -6147,6 +6146,24 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
return max_ring;
}
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp)
+{
+ u16 i, tbl_size, max_ring = 0;
+ struct bnxt_rss_ctx *rss_ctx;
+
+ if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp))
+ return 0;
+
+ tbl_size = bnxt_get_rxfh_indir_size(bp->dev);
+
+ list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) {
+ for (i = 0; i < tbl_size; i++)
+ max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]);
+ }
+
+ return max_ring;
+}
+
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
{
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
@@ -8996,6 +9013,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN);
#endif
}
+ bp->tso_max_segs = le16_to_cpu(resp->max_tso_segs);
hwrm_func_qcaps_exit:
hwrm_req_drop(bp, req);
@@ -12669,7 +12687,11 @@ bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx)
if (!BNXT_NEW_RM(bp))
return true;
- if (hwr.vnic == bp->hw_resc.resv_vnics &&
+ /* Do not reduce VNIC and RSS ctx reservations. There is a FW
+ * issue that will mess up the default VNIC if we reduce the
+ * reservations.
+ */
+ if (hwr.vnic <= bp->hw_resc.resv_vnics &&
hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs)
return true;
@@ -15363,6 +15385,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->priv_flags |= IFF_UNICAST_FLT;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
+ if (bp->tso_max_segs)
+ netif_set_tso_max_segs(dev, bp->tso_max_segs);
dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index bbc7edccd5a4..6b10a09ee1af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2318,6 +2318,7 @@ struct bnxt {
u8 rss_hash_key_updated:1;
u16 max_mtu;
+ u16 tso_max_segs;
u8 max_tc;
u8 max_lltc; /* lossless TCs */
struct bnxt_queue_info q_info[BNXT_MAX_QUEUE];
@@ -2775,6 +2776,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic,
void bnxt_fill_ipv6_mask(__be32 mask[4]);
int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp);
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic);
int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 8763f8a01457..79c09c1cdf93 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev,
return rc;
}
+ if (req_rx_rings < bp->rx_nr_rings &&
+ req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) {
+ netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n");
+ return -EINVAL;
+ }
+
if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
netif_is_rxfh_configured(dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 06ea86c80be1..f219709f9563 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -2,7 +2,7 @@
*
* Copyright (c) 2014-2016 Broadcom Corporation
* Copyright (c) 2014-2018 Broadcom Limited
- * Copyright (c) 2018-2023 Broadcom Inc.
+ * Copyright (c) 2018-2024 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -500,7 +500,11 @@ struct cmd_nums {
#define HWRM_TFC_IF_TBL_GET 0x399UL
#define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL
#define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL
+ #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x39cUL
+ #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x39dUL
+ #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x39eUL
#define HWRM_SV 0x400UL
+ #define HWRM_DBG_LOG_BUFFER_FLUSH 0xff0fUL
#define HWRM_DBG_READ_DIRECT 0xff10UL
#define HWRM_DBG_READ_INDIRECT 0xff11UL
#define HWRM_DBG_WRITE_DIRECT 0xff12UL
@@ -609,8 +613,8 @@ struct hwrm_err_output {
#define HWRM_VERSION_MAJOR 1
#define HWRM_VERSION_MINOR 10
#define HWRM_VERSION_UPDATE 3
-#define HWRM_VERSION_RSVD 39
-#define HWRM_VERSION_STR "1.10.3.39"
+#define HWRM_VERSION_RSVD 44
+#define HWRM_VERSION_STR "1.10.3.44"
/* hwrm_ver_get_input (size:192b/24B) */
struct hwrm_ver_get_input {
@@ -664,6 +668,7 @@ struct hwrm_ver_get_output {
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL
#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL
+ #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL
u8 roce_fw_maj_8b;
u8 roce_fw_min_8b;
u8 roce_fw_bld_8b;
@@ -843,7 +848,9 @@ struct hwrm_async_event_cmpl {
#define ASYNC_EVENT_CMPL_EVENT_ID_HW_DOORBELL_RECOVERY_READ_ERROR 0x49UL
#define ASYNC_EVENT_CMPL_EVENT_ID_CTX_ERROR 0x4aUL
#define ASYNC_EVENT_CMPL_EVENT_ID_UDCC_SESSION_CHANGE 0x4bUL
- #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4cUL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER 0x4cUL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_PEER_MMAP_CHANGE 0x4dUL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4eUL
#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL
#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL
#define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1326,13 +1333,13 @@ struct hwrm_async_event_cmpl_error_report_base {
u8 timestamp_lo;
__le16 timestamp_hi;
__le32 event_data1;
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED
@@ -1814,6 +1821,9 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED 0x800000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_INGRESS_NIC_FLOW_SUPPORTED 0x1000000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT2_LPBK_STATS_SUPPORTED 0x2000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_EGRESS_NIC_FLOW_SUPPORTED 0x4000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT2_MULTI_LOSSLESS_QUEUES_SUPPORTED 0x8000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT2_PEER_MMAP_SUPPORTED 0x10000000UL
__le16 tunnel_disable_flag;
#define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN 0x1UL
#define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE 0x2UL
@@ -1828,7 +1838,7 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_XID_PARTITION_CAP_RX_CK 0x2UL
u8 device_serial_number[8];
__le16 ctxs_per_partition;
- u8 unused_2[2];
+ __le16 max_tso_segs;
__le32 roce_vf_max_av;
__le32 roce_vf_max_cq;
__le32 roce_vf_max_mrw;
@@ -2449,6 +2459,7 @@ struct hwrm_func_drv_rgtr_input {
#define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL
#define FUNC_DRV_RGTR_REQ_FLAGS_ASYM_QUEUE_CFG_SUPPORT 0x400UL
#define FUNC_DRV_RGTR_REQ_FLAGS_TF_INGRESS_NIC_FLOW_MODE 0x800UL
+ #define FUNC_DRV_RGTR_REQ_FLAGS_TF_EGRESS_NIC_FLOW_MODE 0x1000UL
__le32 enables;
#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL
#define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL
@@ -3660,22 +3671,24 @@ struct hwrm_func_backing_store_cfg_v2_input {
__le16 target_id;
__le64 resp_addr;
__le16 type;
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL
- #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK 0x13UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK 0x14UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE 0x20UL
@@ -3772,18 +3785,20 @@ struct hwrm_func_backing_store_qcfg_v2_output {
__le16 seq_id;
__le16 resp_len;
__le16 type;
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL
- #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK 0x13UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK 0x14UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE 0x1eUL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE 0x1fUL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE 0x20UL
@@ -3876,22 +3891,24 @@ struct hwrm_func_backing_store_qcaps_v2_input {
__le16 target_id;
__le64 resp_addr;
__le16 type;
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL
- #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE 0x1eUL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE 0x1fUL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE 0x20UL
@@ -3911,22 +3928,24 @@ struct hwrm_func_backing_store_qcaps_v2_output {
__le16 seq_id;
__le16 resp_len;
__le16 type;
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL
- #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE 0x1eUL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE 0x1fUL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE 0x20UL
@@ -4202,7 +4221,8 @@ struct hwrm_port_phy_cfg_input {
#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_112 0x3eaUL
#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_112 0x7d2UL
#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 0xfa2UL
- #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112
+ #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 0x1f42UL
+ #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112
__le16 auto_link_speeds2_mask;
#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_1GB 0x1UL
#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_10GB 0x2UL
@@ -4217,6 +4237,7 @@ struct hwrm_port_phy_cfg_input {
#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB_PAM4_112 0x400UL
#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_200GB_PAM4_112 0x800UL
#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_400GB_PAM4_112 0x1000UL
+ #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_800GB_PAM4_112 0x2000UL
u8 unused_2[6];
};
@@ -4292,6 +4313,7 @@ struct hwrm_port_phy_qcfg_output {
#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL
#define PORT_PHY_QCFG_RESP_LINK_SPEED_200GB 0x7d0UL
#define PORT_PHY_QCFG_RESP_LINK_SPEED_400GB 0xfa0UL
+ #define PORT_PHY_QCFG_RESP_LINK_SPEED_800GB 0x1f40UL
#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL
#define PORT_PHY_QCFG_RESP_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_LINK_SPEED_10MB
u8 duplex_cfg;
@@ -4451,7 +4473,13 @@ struct hwrm_port_phy_qcfg_output {
#define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR4 0x35UL
#define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR4 0x36UL
#define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 0x37UL
- #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASECR8 0x38UL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASESR8 0x39UL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASELR8 0x3aUL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEER8 0x3bUL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEFR8 0x3cUL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 0x3dUL
+ #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8
u8 media_type;
#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL
#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL
@@ -5049,33 +5077,43 @@ struct hwrm_port_qstats_ext_output {
u8 valid;
};
-/* hwrm_port_lpbk_qstats_input (size:128b/16B) */
+/* hwrm_port_lpbk_qstats_input (size:256b/32B) */
struct hwrm_port_lpbk_qstats_input {
__le16 req_type;
__le16 cmpl_ring;
__le16 seq_id;
__le16 target_id;
__le64 resp_addr;
+ __le16 lpbk_stat_size;
+ u8 flags;
+ #define PORT_LPBK_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL
+ u8 unused_0[5];
+ __le64 lpbk_stat_host_addr;
};
-/* hwrm_port_lpbk_qstats_output (size:768b/96B) */
+/* hwrm_port_lpbk_qstats_output (size:128b/16B) */
struct hwrm_port_lpbk_qstats_output {
__le16 error_code;
__le16 req_type;
__le16 seq_id;
__le16 resp_len;
+ __le16 lpbk_stat_size;
+ u8 unused_0[5];
+ u8 valid;
+};
+
+/* port_lpbk_stats (size:640b/80B) */
+struct port_lpbk_stats {
__le64 lpbk_ucast_frames;
__le64 lpbk_mcast_frames;
__le64 lpbk_bcast_frames;
__le64 lpbk_ucast_bytes;
__le64 lpbk_mcast_bytes;
__le64 lpbk_bcast_bytes;
- __le64 tx_stat_discard;
- __le64 tx_stat_error;
- __le64 rx_stat_discard;
- __le64 rx_stat_error;
- u8 unused_0[7];
- u8 valid;
+ __le64 lpbk_tx_discards;
+ __le64 lpbk_tx_errors;
+ __le64 lpbk_rx_discards;
+ __le64 lpbk_rx_errors;
};
/* hwrm_port_ecn_qstats_input (size:256b/32B) */
@@ -5140,13 +5178,15 @@ struct hwrm_port_clr_stats_output {
u8 valid;
};
-/* hwrm_port_lpbk_clr_stats_input (size:128b/16B) */
+/* hwrm_port_lpbk_clr_stats_input (size:192b/24B) */
struct hwrm_port_lpbk_clr_stats_input {
__le16 req_type;
__le16 cmpl_ring;
__le16 seq_id;
__le16 target_id;
__le64 resp_addr;
+ __le16 port_id;
+ u8 unused_0[6];
};
/* hwrm_port_lpbk_clr_stats_output (size:128b/16B) */
@@ -5287,10 +5327,11 @@ struct hwrm_port_phy_qcaps_output {
#define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G 0x2UL
#define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G 0x4UL
__le16 flags2;
- #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL
- #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL
- #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL
- #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL
+ #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL
+ #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL
+ #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL
+ #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL
+ #define PORT_PHY_QCAPS_RESP_FLAGS2_REMOTE_LPBK_UNSUPPORTED 0x10UL
u8 internal_port_cnt;
u8 unused_0;
__le16 supported_speeds2_force_mode;
@@ -7443,17 +7484,17 @@ struct hwrm_cfa_l2_filter_cfg_input {
__le16 target_id;
__le64 resp_addr;
__le32 flags;
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2)
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2)
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2)
- #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2)
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2)
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2)
+ #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE
#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK 0x30UL
#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT 4
#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE (0x0UL << 4)
@@ -8520,17 +8561,17 @@ struct hwrm_tunnel_dst_port_query_input {
__le16 target_id;
__le64 resp_addr;
u8 tunnel_type;
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
- #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
+ #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL
#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL
#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL
#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL
@@ -8576,17 +8617,17 @@ struct hwrm_tunnel_dst_port_alloc_input {
__le16 target_id;
__le64 resp_addr;
u8 tunnel_type;
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
- #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
+ #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL
#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL
#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL
#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL
@@ -8635,17 +8676,17 @@ struct hwrm_tunnel_dst_port_free_input {
__le16 target_id;
__le64 resp_addr;
u8 tunnel_type;
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
- #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL
+ #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL
#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL
#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL
#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL
@@ -9109,6 +9150,7 @@ struct hwrm_struct_hdr {
#define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC 0x424UL
#define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE 0x426UL
#define STRUCT_HDR_STRUCT_ID_POWER_BKUP 0x427UL
+ #define STRUCT_HDR_STRUCT_ID_PEER_MMAP 0x429UL
#define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL
#define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL
#define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL
@@ -9758,6 +9800,9 @@ struct hwrm_dbg_coredump_initiate_input {
__le16 instance;
__le16 unused_0;
u8 seg_flags;
+ #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_LIVE_DATA 0x1UL
+ #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_CRASH_DATA 0x2UL
+ #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_COLLECT_CTX_L1_CACHE 0x4UL
u8 unused_1[7];
};
@@ -10433,13 +10478,13 @@ struct hwrm_selftest_irq_output {
/* dbc_dbc (size:64b/8B) */
struct dbc_dbc {
- u32 index;
+ __le32 index;
#define DBC_DBC_INDEX_MASK 0xffffffUL
#define DBC_DBC_INDEX_SFT 0
#define DBC_DBC_EPOCH 0x1000000UL
#define DBC_DBC_TOGGLE_MASK 0x6000000UL
#define DBC_DBC_TOGGLE_SFT 25
- u32 type_path_xid;
+ __le32 type_path_xid;
#define DBC_DBC_XID_MASK 0xfffffUL
#define DBC_DBC_XID_SFT 0
#define DBC_DBC_PATH_MASK 0x3000000UL
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5e9a93bdb518..23ebeb143987 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2482,6 +2482,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[bufidx];
+
+ /* Sanity checks on our free map to make sure it points to an index
+ * that is not being occupied by another skb. If skb memory is
+ * not freed then we see congestion control kick in and halt tx.
+ */
+ if (unlikely(tx_buff->skb)) {
+ dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
+ skb_is_gso(skb) ? "tso_pool" : "tx_pool",
+ queue_num, bufidx);
+ dev_kfree_skb_any(tx_buff->skb);
+ }
+
tx_buff->skb = skb;
tx_buff->index = bufidx;
tx_buff->pool_index = queue_num;
@@ -4061,6 +4073,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
adapter->num_active_tx_scrqs = 0;
}
+ /* Clean any remaining outstanding SKBs
+ * we freed the irq so we won't be hearing
+ * from them
+ */
+ clean_tx_pools(adapter);
+
if (adapter->rx_scrq) {
for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
if (!adapter->rx_scrq[i])
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 2e98a2a0bead..ce227b56cf72 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1109,6 +1109,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
}
/**
+ * e1000e_force_smbus - Force interfaces to transition to SMBUS mode.
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already
+ * acquired.
+ *
+ * Return: 0 on success, negative errno on failure.
+ **/
+static s32 e1000e_force_smbus(struct e1000_hw *hw)
+{
+ u16 smb_ctrl = 0;
+ u32 ctrl_ext;
+ s32 ret_val;
+
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ /* Force SMBus mode in the PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl);
+ if (ret_val) {
+ e1000e_enable_phy_retry(hw);
+ return ret_val;
+ }
+
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in the MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
+
+ return 0;
+}
+
+/**
* e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP
* @hw: pointer to the HW structure
* @to_sx: boolean indicating a system power state transition to Sx
@@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
+ if (hw->mac.type != e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val) {
+ e_dbg("Failed to force SMBUS: %d\n", ret_val);
+ goto release;
+ }
+ }
+
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
}
release:
- /* Switching PHY interface always returns MDI error
- * so disable retry mechanism to avoid wasting time
- */
- e1000e_disable_phy_retry(hw);
-
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val) {
- e1000e_enable_phy_retry(hw);
- hw->phy.ops.release(hw);
- goto out;
+ if (hw->mac.type == e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val)
+ e_dbg("Failed to force SMBUS over MTL system: %d\n",
+ ret_val);
}
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- e1000e_enable_phy_retry(hw);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
hw->phy.ops.release(hw);
out:
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index da5c59daf8ba..3cd161c6672b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6363,49 +6363,49 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
ew32(EXTCNF_CTRL, mac_data);
- /* Enable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data |= BIT(22);
- ew32(FEXTNVM7, mac_data);
-
/* Disable disconnected cable conditioning for Power Gating */
mac_data = er32(DPGFR);
mac_data |= BIT(2);
ew32(DPGFR, mac_data);
- /* Don't wake from dynamic Power Gating with clock request */
- mac_data = er32(FEXTNVM12);
- mac_data |= BIT(12);
- ew32(FEXTNVM12, mac_data);
-
- /* Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data &= ~BIT(28);
- ew32(FEXTNVM9, mac_data);
-
- /* Enable K1 off to enable mPHY Power Gating */
- mac_data = er32(FEXTNVM6);
- mac_data |= BIT(31);
- ew32(FEXTNVM6, mac_data);
-
- /* Enable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data |= BIT(9);
- ew32(FEXTNVM8, mac_data);
-
/* Enable the Dynamic Clock Gating in the DMA and MAC */
mac_data = er32(CTRL_EXT);
mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
ew32(CTRL_EXT, mac_data);
-
- /* No MAC DPG gating SLP_S0 in modern standby
- * Switch the logic of the lanphypc to use PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data |= BIT(7);
- ew32(FEXTNVM5, mac_data);
}
+ /* Enable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data |= BIT(22);
+ ew32(FEXTNVM7, mac_data);
+
+ /* Don't wake from dynamic Power Gating with clock request */
+ mac_data = er32(FEXTNVM12);
+ mac_data |= BIT(12);
+ ew32(FEXTNVM12, mac_data);
+
+ /* Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data &= ~BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Enable K1 off to enable mPHY Power Gating */
+ mac_data = er32(FEXTNVM6);
+ mac_data |= BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Enable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data |= BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* No MAC DPG gating SLP_S0 in modern standby
+ * Switch the logic of the lanphypc to use PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data |= BIT(7);
+ ew32(FEXTNVM5, mac_data);
+
/* Disable the time synchronization clock */
mac_data = er32(FEXTNVM7);
mac_data |= BIT(31);
@@ -6498,33 +6498,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
} else {
/* Request driver unconfigure the device from S0ix */
- /* Disable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data &= 0xFFBFFFFF;
- ew32(FEXTNVM7, mac_data);
-
- /* Disable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data &= ~BIT(9);
- ew32(FEXTNVM8, mac_data);
-
- /* Disable K1 off */
- mac_data = er32(FEXTNVM6);
- mac_data &= ~BIT(31);
- ew32(FEXTNVM6, mac_data);
-
- /* Disable Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data |= BIT(28);
- ew32(FEXTNVM9, mac_data);
-
- /* Cancel not waking from dynamic
- * Power Gating with clock request
- */
- mac_data = er32(FEXTNVM12);
- mac_data &= ~BIT(12);
- ew32(FEXTNVM12, mac_data);
-
/* Cancel disable disconnected cable conditioning
* for Power Gating
*/
@@ -6537,13 +6510,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
mac_data &= 0xFFF7FFFF;
ew32(CTRL_EXT, mac_data);
- /* Revert the lanphypc logic to use the internal Gbe counter
- * and not the PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data &= 0xFFFFFF7F;
- ew32(FEXTNVM5, mac_data);
-
/* Enable the periodic inband message,
* Request PCIe clock in K1 page770_17[10:9] =01b
*/
@@ -6581,6 +6547,40 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
mac_data &= ~BIT(31);
mac_data |= BIT(0);
ew32(FEXTNVM7, mac_data);
+
+ /* Disable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data &= 0xFFBFFFFF;
+ ew32(FEXTNVM7, mac_data);
+
+ /* Disable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data &= ~BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* Disable K1 off */
+ mac_data = er32(FEXTNVM6);
+ mac_data &= ~BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Disable Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data |= BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Cancel not waking from dynamic
+ * Power Gating with clock request
+ */
+ mac_data = er32(FEXTNVM12);
+ mac_data &= ~BIT(12);
+ ew32(FEXTNVM12, mac_data);
+
+ /* Revert the lanphypc logic to use the internal Gbe counter
+ * and not the PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data &= 0xFFFFFF7F;
+ ew32(FEXTNVM5, mac_data);
}
static int e1000e_pm_freeze(struct device *dev)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index ee86d2c53079..55b5bb884d73 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -109,10 +109,6 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
-EFBIG, /* I40E_AQ_RC_EFBIG */
};
- /* aq_rc is invalid if AQ timed out */
- if (aq_ret == -EIO)
- return -EAGAIN;
-
if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
return -ERANGE;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 284c3fad5a6e..310513d9321b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
bool need_reset;
int i;
+ /* VSI shall be deleted in a moment, block loading new programs */
+ if (prog && test_bit(__I40E_IN_REMOVE, pf->state))
+ return -EINVAL;
+
/* Don't allow frames that span over multiple buffers */
if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
@@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
-
if (need_reset)
i40e_prep_for_reset(pf);
- /* VSI shall be deleted in a moment, just return EINVAL */
- if (test_bit(__I40E_IN_REMOVE, pf->state))
- return -EINVAL;
-
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) {
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index ce5034ed2b24..f182179529b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -1339,6 +1339,7 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start,
for (i = 0; i < count; i++) {
bool last = false;
+ int try_cnt = 0;
int status;
bh = (struct ice_buf_hdr *)(bufs + start + i);
@@ -1346,8 +1347,26 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start,
if (indicate_last)
last = ice_is_last_download_buffer(bh, i, count);
- status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last,
- &offset, &info, NULL);
+ while (1) {
+ status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE,
+ last, &offset, &info,
+ NULL);
+ if (hw->adminq.sq_last_status != ICE_AQ_RC_ENOSEC &&
+ hw->adminq.sq_last_status != ICE_AQ_RC_EBADSIG)
+ break;
+
+ try_cnt++;
+
+ if (try_cnt == 5)
+ break;
+
+ msleep(20);
+ }
+
+ if (try_cnt)
+ dev_dbg(ice_hw_to_dev(hw),
+ "ice_aq_download_pkg number of retries: %d\n",
+ try_cnt);
/* Save AQ status from download package */
if (status) {
diff --git a/drivers/net/ethernet/intel/ice/ice_hwmon.c b/drivers/net/ethernet/intel/ice/ice_hwmon.c
index e4c2c1bff6c0..b7aa6812510a 100644
--- a/drivers/net/ethernet/intel/ice/ice_hwmon.c
+++ b/drivers/net/ethernet/intel/ice/ice_hwmon.c
@@ -96,7 +96,7 @@ static bool ice_is_internal_reading_supported(struct ice_pf *pf)
unsigned long sensors = pf->hw.dev_caps.supported_sensors;
- return _test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors);
+ return test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors);
};
void ice_hwmon_init(struct ice_pf *pf)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1b61ca3a6eb6..55a42aad92a5 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -805,6 +805,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
}
switch (vsi->port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_200GB:
+ speed = "200 G";
+ break;
case ICE_AQ_LINK_SPEED_100GB:
speed = "100 G";
break;
@@ -4136,7 +4139,7 @@ bool ice_is_wol_supported(struct ice_hw *hw)
int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
{
struct ice_pf *pf = vsi->back;
- int err = 0, timeout = 50;
+ int i, err = 0, timeout = 50;
if (!new_rx && !new_tx)
return -EINVAL;
@@ -4162,6 +4165,14 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
ice_vsi_close(vsi);
ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+
+ ice_for_each_traffic_class(i) {
+ if (vsi->tc_cfg.ena_tc & BIT(i))
+ netdev_set_tc_queue(vsi->netdev,
+ vsi->tc_cfg.tc_info[i].netdev_tc,
+ vsi->tc_cfg.tc_info[i].qcount_tx,
+ vsi->tc_cfg.tc_info[i].qoffset);
+ }
ice_pf_dcb_recfg(pf, locked);
ice_vsi_open(vsi);
done:
@@ -5564,7 +5575,7 @@ static int ice_suspend(struct device *dev)
*/
disabled = ice_service_task_stop(pf);
- ice_unplug_aux_dev(pf);
+ ice_deinit_rdma(pf);
/* Already suspended?, then there is nothing to do */
if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
@@ -5644,6 +5655,11 @@ static int ice_resume(struct device *dev)
if (ret)
dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
+ ret = ice_init_rdma(pf);
+ if (ret)
+ dev_err(dev, "Reinitialize RDMA during resume failed: %d\n",
+ ret);
+
clear_bit(ICE_DOWN, pf->state);
/* Now perform PF reset and rebuild */
reset_type = ICE_RESET_PFR;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 0f17fc1181d2..fefaf52fd677 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1559,6 +1559,10 @@ void ice_ptp_extts_event(struct ice_pf *pf)
u8 chan, tmr_idx;
u32 hi, lo;
+ /* Don't process timestamp events if PTP is not ready */
+ if (pf->ptp.state != ICE_PTP_READY)
+ return;
+
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* Event time is captured by one of the two matched registers
* GLTSYN_EVNT_L: 32 LSB of sampled time event
@@ -1584,27 +1588,33 @@ void ice_ptp_extts_event(struct ice_pf *pf)
/**
* ice_ptp_cfg_extts - Configure EXTTS pin and channel
* @pf: Board private structure
- * @ena: true to enable; false to disable
* @chan: GPIO channel (0-3)
- * @gpio_pin: GPIO pin
- * @extts_flags: request flags from the ptp_extts_request.flags
+ * @config: desired EXTTS configuration.
+ * @store: If set to true, the values will be stored
+ *
+ * Configure an external timestamp event on the requested channel.
+ *
+ * Return: 0 on success, -EOPNOTUSPP on unsupported flags
*/
-static int
-ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
- unsigned int extts_flags)
+static int ice_ptp_cfg_extts(struct ice_pf *pf, unsigned int chan,
+ struct ice_extts_channel *config, bool store)
{
u32 func, aux_reg, gpio_reg, irq_reg;
struct ice_hw *hw = &pf->hw;
u8 tmr_idx;
- if (chan > (unsigned int)pf->ptp.info.n_ext_ts)
- return -EINVAL;
+ /* Reject requests with unsupported flags */
+ if (config->flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
irq_reg = rd32(hw, PFINT_OICR_ENA);
- if (ena) {
+ if (config->ena) {
/* Enable the interrupt */
irq_reg |= PFINT_OICR_TSYN_EVNT_M;
aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M;
@@ -1613,9 +1623,9 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1)
/* set event level to requested edge */
- if (extts_flags & PTP_FALLING_EDGE)
+ if (config->flags & PTP_FALLING_EDGE)
aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE;
- if (extts_flags & PTP_RISING_EDGE)
+ if (config->flags & PTP_RISING_EDGE)
aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE;
/* Write GPIO CTL reg.
@@ -1636,12 +1646,52 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
wr32(hw, PFINT_OICR_ENA, irq_reg);
wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg);
- wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg);
+ wr32(hw, GLGEN_GPIO_CTL(config->gpio_pin), gpio_reg);
+
+ if (store)
+ memcpy(&pf->ptp.extts_channels[chan], config, sizeof(*config));
return 0;
}
/**
+ * ice_ptp_disable_all_extts - Disable all EXTTS channels
+ * @pf: Board private structure
+ */
+static void ice_ptp_disable_all_extts(struct ice_pf *pf)
+{
+ struct ice_extts_channel extts_cfg = {};
+ int i;
+
+ for (i = 0; i < pf->ptp.info.n_ext_ts; i++) {
+ if (pf->ptp.extts_channels[i].ena) {
+ extts_cfg.gpio_pin = pf->ptp.extts_channels[i].gpio_pin;
+ extts_cfg.ena = false;
+ ice_ptp_cfg_extts(pf, i, &extts_cfg, false);
+ }
+ }
+
+ synchronize_irq(pf->oicr_irq.virq);
+}
+
+/**
+ * ice_ptp_enable_all_extts - Enable all EXTTS channels
+ * @pf: Board private structure
+ *
+ * Called during reset to restore user configuration.
+ */
+static void ice_ptp_enable_all_extts(struct ice_pf *pf)
+{
+ int i;
+
+ for (i = 0; i < pf->ptp.info.n_ext_ts; i++) {
+ if (pf->ptp.extts_channels[i].ena)
+ ice_ptp_cfg_extts(pf, i, &pf->ptp.extts_channels[i],
+ false);
+ }
+}
+
+/**
* ice_ptp_cfg_clkout - Configure clock to generate periodic wave
* @pf: Board private structure
* @chan: GPIO channel (0-3)
@@ -1659,6 +1709,9 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
u32 func, val, gpio_pin;
u8 tmr_idx;
+ if (config && config->flags & ~PTP_PEROUT_PHASE)
+ return -EOPNOTSUPP;
+
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* 0. Reset mode & out_en in AUX_OUT */
@@ -1795,17 +1848,18 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
struct ptp_clock_request *rq, int on)
{
struct ice_pf *pf = ptp_info_to_pf(info);
- struct ice_perout_channel clk_cfg = {0};
bool sma_pres = false;
unsigned int chan;
u32 gpio_pin;
- int err;
if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL))
sma_pres = true;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ {
+ struct ice_perout_channel clk_cfg = {};
+
chan = rq->perout.index;
if (sma_pres) {
if (chan == ice_pin_desc_e810t[SMA1].chan)
@@ -1825,15 +1879,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
clk_cfg.gpio_pin = chan;
}
+ clk_cfg.flags = rq->perout.flags;
clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) +
rq->perout.period.nsec);
clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) +
rq->perout.start.nsec);
clk_cfg.ena = !!on;
- err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
- break;
+ return ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
+ }
case PTP_CLK_REQ_EXTTS:
+ {
+ struct ice_extts_channel extts_cfg = {};
+
chan = rq->extts.index;
if (sma_pres) {
if (chan < ice_pin_desc_e810t[SMA2].chan)
@@ -1849,14 +1907,15 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
gpio_pin = chan;
}
- err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin,
- rq->extts.flags);
- break;
+ extts_cfg.flags = rq->extts.flags;
+ extts_cfg.gpio_pin = gpio_pin;
+ extts_cfg.ena = !!on;
+
+ return ice_ptp_cfg_extts(pf, chan, &extts_cfg, true);
+ }
default:
return -EOPNOTSUPP;
}
-
- return err;
}
/**
@@ -1869,26 +1928,32 @@ static int ice_ptp_gpio_enable_e823(struct ptp_clock_info *info,
struct ptp_clock_request *rq, int on)
{
struct ice_pf *pf = ptp_info_to_pf(info);
- struct ice_perout_channel clk_cfg = {0};
- int err;
switch (rq->type) {
case PTP_CLK_REQ_PPS:
+ {
+ struct ice_perout_channel clk_cfg = {};
+
+ clk_cfg.flags = rq->perout.flags;
clk_cfg.gpio_pin = PPS_PIN_INDEX;
clk_cfg.period = NSEC_PER_SEC;
clk_cfg.ena = !!on;
- err = ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true);
- break;
+ return ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true);
+ }
case PTP_CLK_REQ_EXTTS:
- err = ice_ptp_cfg_extts(pf, !!on, rq->extts.index,
- TIME_SYNC_PIN_INDEX, rq->extts.flags);
- break;
+ {
+ struct ice_extts_channel extts_cfg = {};
+
+ extts_cfg.flags = rq->extts.flags;
+ extts_cfg.gpio_pin = TIME_SYNC_PIN_INDEX;
+ extts_cfg.ena = !!on;
+
+ return ice_ptp_cfg_extts(pf, rq->extts.index, &extts_cfg, true);
+ }
default:
return -EOPNOTSUPP;
}
-
- return err;
}
/**
@@ -2720,6 +2785,10 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf)
ice_ptp_restart_all_phy(pf);
}
+ /* Re-enable all periodic outputs and external timestamp events */
+ ice_ptp_enable_all_clkout(pf);
+ ice_ptp_enable_all_extts(pf);
+
return 0;
}
@@ -3275,6 +3344,8 @@ void ice_ptp_release(struct ice_pf *pf)
ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+ ice_ptp_disable_all_extts(pf);
+
kthread_cancel_delayed_work_sync(&pf->ptp.work);
ice_ptp_port_phy_stop(&pf->ptp.port);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 3af20025043a..e2af9749061c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -29,10 +29,17 @@ enum ice_ptp_pin_e810t {
struct ice_perout_channel {
bool ena;
u32 gpio_pin;
+ u32 flags;
u64 period;
u64 start_time;
};
+struct ice_extts_channel {
+ bool ena;
+ u32 gpio_pin;
+ u32 flags;
+};
+
/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
* is stored in a buffer of registers. Depending on the specific hardware,
* this buffer might be shared across multiple PHY ports.
@@ -226,6 +233,7 @@ enum ice_ptp_state {
* @ext_ts_irq: the external timestamp IRQ in use
* @kworker: kwork thread for handling periodic work
* @perout_channels: periodic output data
+ * @extts_channels: channels for external timestamps
* @info: structure defining PTP hardware capabilities
* @clock: pointer to registered PTP clock device
* @tstamp_config: hardware timestamping configuration
@@ -249,6 +257,7 @@ struct ice_ptp {
u8 ext_ts_irq;
struct kthread_worker *kworker;
struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX];
+ struct ice_extts_channel extts_channels[GLTSYN_TGT_H_IDX_MAX];
struct ptp_clock_info info;
struct ptp_clock *clock;
struct hwtstamp_config tstamp_config;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 94d6670d0901..1191031b2a43 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1899,7 +1899,8 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
lkup_type == ICE_SW_LKUP_PROMISC ||
lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
- lkup_type == ICE_SW_LKUP_DFLT) {
+ lkup_type == ICE_SW_LKUP_DFLT ||
+ lkup_type == ICE_SW_LKUP_LAST) {
sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
} else if (lkup_type == ICE_SW_LKUP_VLAN) {
if (opc == ice_aqc_opc_alloc_res)
@@ -2922,7 +2923,8 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
lkup_type == ICE_SW_LKUP_PROMISC ||
lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
- lkup_type == ICE_SW_LKUP_DFLT)
+ lkup_type == ICE_SW_LKUP_DFLT ||
+ lkup_type == ICE_SW_LKUP_LAST)
rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
ICE_AQC_SW_RULES_T_VSI_LIST_SET;
else if (lkup_type == ICE_SW_LKUP_VLAN)
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 5352fee62d2b..0b9982804370 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
if (ch->dma.irq)
free_irq(ch->dma.irq, priv);
if (IS_RX(ch->idx)) {
- int desc;
+ struct ltq_dma_channel *dma = &ch->dma;
- for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+ for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++)
dev_kfree_skb_any(ch->skb[ch->dma.desc]);
}
}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index e91486c48de3..9adf4301c9b1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4014,7 +4014,10 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
}
}
- skb = build_skb(data, frag_size);
+ if (frag_size)
+ skb = build_skb(data, frag_size);
+ else
+ skb = slab_build_skb(data);
if (!skb) {
netdev_warn(port->dev, "skb build failed\n");
goto err_drop_frame;
@@ -6904,6 +6907,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
/* 9704 == 9728 - 20 and rounding to 8 */
dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
device_set_node(&dev->dev, port_fwnode);
+ dev->dev_port = port->id;
port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops;
port->pcs_gmac.neg_mode = true;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4a77f6fe2622..05b84581d5c5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1745,7 +1745,7 @@ struct cpt_lf_alloc_req_msg {
u16 nix_pf_func;
u16 sso_pf_func;
u16 eng_grpmsk;
- int blkaddr;
+ u8 blkaddr;
u8 ctx_ilen_valid : 1;
u8 ctx_ilen : 7;
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index d883157393ea..6c3aca6f278d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -63,8 +63,13 @@ enum npc_kpu_lb_ltype {
NPC_LT_LB_CUSTOM1 = 0xF,
};
+/* Don't modify ltypes up to IP6_EXT, otherwise length and checksum of IP
+ * headers may not be checked correctly. IPv4 ltypes and IPv6 ltypes must
+ * differ only at bit 0 so mask 0xE can be used to detect extended headers.
+ */
enum npc_kpu_lc_ltype {
- NPC_LT_LC_IP = 1,
+ NPC_LT_LC_PTP = 1,
+ NPC_LT_LC_IP,
NPC_LT_LC_IP_OPT,
NPC_LT_LC_IP6,
NPC_LT_LC_IP6_EXT,
@@ -72,7 +77,6 @@ enum npc_kpu_lc_ltype {
NPC_LT_LC_RARP,
NPC_LT_LC_MPLS,
NPC_LT_LC_NSH,
- NPC_LT_LC_PTP,
NPC_LT_LC_FCOE,
NPC_LT_LC_NGIO,
NPC_LT_LC_CUSTOM0 = 0xE,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index ff78251f92d4..5f661e67ccbc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu,
if (req->ssow > block->lf.max) {
dev_err(&rvu->pdev->dev,
"Func 0x%x: Invalid SSOW req, %d > max %d\n",
- pcifunc, req->sso, block->lf.max);
+ pcifunc, req->ssow, block->lf.max);
return -EINVAL;
}
mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index f047185f38e0..3e09d2285814 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -696,7 +696,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
struct cpt_rd_wr_reg_msg *req,
struct cpt_rd_wr_reg_msg *rsp)
{
- int blkaddr;
+ u64 offset = req->reg_offset;
+ int blkaddr, lf;
blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
if (blkaddr < 0)
@@ -707,17 +708,25 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
!is_cpt_vf(rvu, req->hdr.pcifunc))
return CPT_AF_ERR_ACCESS_DENIED;
- rsp->reg_offset = req->reg_offset;
- rsp->ret_val = req->ret_val;
- rsp->is_write = req->is_write;
-
if (!is_valid_offset(rvu, req))
return CPT_AF_ERR_ACCESS_DENIED;
+ /* Translate local LF used by VFs to global CPT LF */
+ lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc,
+ (offset & 0xFFF) >> 3);
+
+ /* Translate local LF's offset to global CPT LF's offset */
+ offset &= 0xFF000;
+ offset += lf << 3;
+
+ rsp->reg_offset = offset;
+ rsp->ret_val = req->ret_val;
+ rsp->is_write = req->is_write;
+
if (req->is_write)
- rvu_write64(rvu, blkaddr, req->reg_offset, req->val);
+ rvu_write64(rvu, blkaddr, offset, req->val);
else
- rsp->val = rvu_read64(rvu, blkaddr, req->reg_offset);
+ rsp->val = rvu_read64(rvu, blkaddr, offset);
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 00af8888e329..3dc828cf6c5a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3864,6 +3864,11 @@ static int get_flowkey_alg_idx(struct nix_hw *nix_hw, u32 flow_cfg)
return -ERANGE;
}
+/* Mask to match ipv6(NPC_LT_LC_IP6) and ipv6 ext(NPC_LT_LC_IP6_EXT) */
+#define NPC_LT_LC_IP6_MATCH_MSK ((~(NPC_LT_LC_IP6 ^ NPC_LT_LC_IP6_EXT)) & 0xf)
+/* Mask to match both ipv4(NPC_LT_LC_IP) and ipv4 ext(NPC_LT_LC_IP_OPT) */
+#define NPC_LT_LC_IP_MATCH_MSK ((~(NPC_LT_LC_IP ^ NPC_LT_LC_IP_OPT)) & 0xf)
+
static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
{
int idx, nr_field, key_off, field_marker, keyoff_marker;
@@ -3933,7 +3938,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
field->hdr_offset = 9; /* offset */
field->bytesm1 = 0; /* 1 byte */
field->ltype_match = NPC_LT_LC_IP;
- field->ltype_mask = 0xF;
+ field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK;
break;
case NIX_FLOW_KEY_TYPE_IPV4:
case NIX_FLOW_KEY_TYPE_INNR_IPV4:
@@ -3960,8 +3965,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
field->bytesm1 = 3; /* DIP, 4 bytes */
}
}
-
- field->ltype_mask = 0xF; /* Match only IPv4 */
+ field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK;
keyoff_marker = false;
break;
case NIX_FLOW_KEY_TYPE_IPV6:
@@ -3990,7 +3994,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
field->bytesm1 = 15; /* DIP,16 bytes */
}
}
- field->ltype_mask = 0xF; /* Match only IPv6 */
+ field->ltype_mask = NPC_LT_LC_IP6_MATCH_MSK;
break;
case NIX_FLOW_KEY_TYPE_TCP:
case NIX_FLOW_KEY_TYPE_UDP:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 5664f768cb0c..64a97a0a10ed 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -9,10 +9,9 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o
rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
otx2_devlink.o qos_sq.o qos.o
-rvu_nicvf-y := otx2_vf.o otx2_devlink.o
+rvu_nicvf-y := otx2_vf.o
rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
-rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o
rvu_nicpf-$(CONFIG_MACSEC) += cn10k_macsec.o
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index a85ac039d779..87d5776e3b88 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -648,14 +648,14 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for
} else if (lvl == NIX_TXSCH_LVL_TL4) {
parent = schq_list[NIX_TXSCH_LVL_TL3][prio];
req->reg[0] = NIX_AF_TL4X_PARENT(schq);
- req->regval[0] = parent << 16;
+ req->regval[0] = (u64)parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
req->regval[1] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL3) {
parent = schq_list[NIX_TXSCH_LVL_TL2][prio];
req->reg[0] = NIX_AF_TL3X_PARENT(schq);
- req->regval[0] = parent << 16;
+ req->regval[0] = (u64)parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
req->regval[1] = dwrr_val;
@@ -670,11 +670,11 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for
} else if (lvl == NIX_TXSCH_LVL_TL2) {
parent = schq_list[NIX_TXSCH_LVL_TL1][prio];
req->reg[0] = NIX_AF_TL2X_PARENT(schq);
- req->regval[0] = parent << 16;
+ req->regval[0] = (u64)parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
- req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
+ req->regval[1] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 | dwrr_val;
if (lvl == hw->txschq_link_cfg_lvl) {
req->num_regs++;
@@ -698,7 +698,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for
req->num_regs++;
req->reg[1] = NIX_AF_TL1X_TOPOLOGY(schq);
- req->regval[1] = (TXSCH_TL1_DFLT_RR_PRIO << 1);
+ req->regval[1] = hw->txschq_aggr_lvl_rr_prio << 1;
req->num_regs++;
req->reg[2] = NIX_AF_TL1X_CIR(schq);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
index 28fb643d2917..aa01110f04a3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -54,6 +54,7 @@ int otx2_pfc_txschq_config(struct otx2_nic *pfvf)
return 0;
}
+EXPORT_SYMBOL(otx2_pfc_txschq_config);
static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio)
{
@@ -122,6 +123,7 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf)
return 0;
}
+EXPORT_SYMBOL(otx2_pfc_txschq_alloc);
static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio)
{
@@ -260,6 +262,7 @@ update_sq_smq_map:
return 0;
}
+EXPORT_SYMBOL(otx2_pfc_txschq_update);
int otx2_pfc_txschq_stop(struct otx2_nic *pfvf)
{
@@ -282,6 +285,7 @@ int otx2_pfc_txschq_stop(struct otx2_nic *pfvf)
return 0;
}
+EXPORT_SYMBOL(otx2_pfc_txschq_stop);
int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf)
{
@@ -321,6 +325,7 @@ unlock:
mutex_unlock(&pfvf->mbox.lock);
return err;
}
+EXPORT_SYMBOL(otx2_config_priority_flow_ctrl);
void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx,
bool pfc_enable)
@@ -385,6 +390,7 @@ out:
"Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n",
qidx, err);
}
+EXPORT_SYMBOL(otx2_update_bpid_in_rqctx);
static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc)
{
@@ -472,3 +478,4 @@ int otx2_dcbnl_set_ops(struct net_device *dev)
return 0;
}
+EXPORT_SYMBOL(otx2_dcbnl_set_ops);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
index 99ddf31269d9..458d34a62e18 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
@@ -113,6 +113,7 @@ err_dl:
devlink_free(dl);
return err;
}
+EXPORT_SYMBOL(otx2_register_dl);
void otx2_unregister_dl(struct otx2_nic *pfvf)
{
@@ -124,3 +125,4 @@ void otx2_unregister_dl(struct otx2_nic *pfvf)
ARRAY_SIZE(otx2_dl_params));
devlink_free(dl);
}
+EXPORT_SYMBOL(otx2_unregister_dl);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
index 45a32e4b49d1..e3aee6e36215 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
@@ -139,33 +139,34 @@
#define NIX_LF_CINTX_ENA_W1C(a) (NIX_LFBASE | 0xD50 | (a) << 12)
/* NIX AF transmit scheduler registers */
-#define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16)
-#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (a) << 16)
-#define NIX_AF_TL1X_CIR(a) (0xC20 | (a) << 16)
-#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (a) << 16)
-#define NIX_AF_TL2X_PARENT(a) (0xE88 | (a) << 16)
-#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (a) << 16)
-#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (a) << 16)
-#define NIX_AF_TL2X_CIR(a) (0xE20 | (a) << 16)
-#define NIX_AF_TL2X_PIR(a) (0xE30 | (a) << 16)
-#define NIX_AF_TL3X_PARENT(a) (0x1088 | (a) << 16)
-#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16)
-#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (a) << 16)
-#define NIX_AF_TL3X_CIR(a) (0x1020 | (a) << 16)
-#define NIX_AF_TL3X_PIR(a) (0x1030 | (a) << 16)
-#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (a) << 16)
-#define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16)
-#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16)
-#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (a) << 16)
-#define NIX_AF_TL4X_CIR(a) (0x1220 | (a) << 16)
-#define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16)
-#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (a) << 16)
-#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16)
-#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (a) << 16)
-#define NIX_AF_MDQX_CIR(a) (0x1420 | (a) << 16)
-#define NIX_AF_MDQX_PIR(a) (0x1430 | (a) << 16)
-#define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16)
-#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3)
+#define NIX_AF_SMQX_CFG(a) (0x700 | (u64)(a) << 16)
+#define NIX_AF_TL4X_SDP_LINK_CFG(a) (0xB10 | (u64)(a) << 16)
+#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (u64)(a) << 16)
+#define NIX_AF_TL1X_CIR(a) (0xC20 | (u64)(a) << 16)
+#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (u64)(a) << 16)
+#define NIX_AF_TL2X_PARENT(a) (0xE88 | (u64)(a) << 16)
+#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (u64)(a) << 16)
+#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (u64)(a) << 16)
+#define NIX_AF_TL2X_CIR(a) (0xE20 | (u64)(a) << 16)
+#define NIX_AF_TL2X_PIR(a) (0xE30 | (u64)(a) << 16)
+#define NIX_AF_TL3X_PARENT(a) (0x1088 | (u64)(a) << 16)
+#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (u64)(a) << 16)
+#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (u64)(a) << 16)
+#define NIX_AF_TL3X_CIR(a) (0x1020 | (u64)(a) << 16)
+#define NIX_AF_TL3X_PIR(a) (0x1030 | (u64)(a) << 16)
+#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (u64)(a) << 16)
+#define NIX_AF_TL4X_PARENT(a) (0x1288 | (u64)(a) << 16)
+#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (u64)(a) << 16)
+#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (u64)(a) << 16)
+#define NIX_AF_TL4X_CIR(a) (0x1220 | (u64)(a) << 16)
+#define NIX_AF_TL4X_PIR(a) (0x1230 | (u64)(a) << 16)
+#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (u64)(a) << 16)
+#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (u64)(a) << 16)
+#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (u64)(a) << 16)
+#define NIX_AF_MDQX_CIR(a) (0x1420 | (u64)(a) << 16)
+#define NIX_AF_MDQX_PIR(a) (0x1430 | (u64)(a) << 16)
+#define NIX_AF_MDQX_PARENT(a) (0x1480 | (u64)(a) << 16)
+#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (u64)(a) << 16 | (b) << 3)
/* LMT LF registers */
#define LMT_LFBASE BIT_ULL(RVU_FUNC_BLKADDR_SHIFT)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index a16e9f244117..3eb85949677a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -513,7 +513,7 @@ process_cqe:
static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_poll *cq_poll)
{
- struct dim_sample dim_sample;
+ struct dim_sample dim_sample = { 0 };
u64 rx_frames, rx_bytes;
u64 tx_frames, tx_bytes;
@@ -1174,8 +1174,11 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
/* Insert vlan tag before giving pkt to tso */
- if (skb_vlan_tag_present(skb))
+ if (skb_vlan_tag_present(skb)) {
skb = __vlan_hwaccel_push_inside(skb);
+ if (!skb)
+ return true;
+ }
otx2_sq_append_tso(pfvf, sq, skb, qidx);
return true;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index edac008099c0..0f844c14485a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -153,7 +153,6 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf,
num_regs++;
otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
-
} else if (level == NIX_TXSCH_LVL_TL4) {
otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
} else if (level == NIX_TXSCH_LVL_TL3) {
@@ -176,7 +175,7 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf,
/* check if node is root */
if (node->qid == OTX2_QOS_QID_INNER && !node->parent) {
cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq);
- cfg->regval[num_regs] = TXSCH_TL1_DFLT_RR_PRIO << 24 |
+ cfg->regval[num_regs] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 |
mtu_to_dwrr_weight(pfvf,
pfvf->tx_max_pktlen);
num_regs++;
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 31aebeb2e285..25989c79c92e 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev)
{
struct device_node *of_node;
struct mtk_star_priv *priv;
+ struct phy_device *phydev;
struct net_device *ndev;
struct device *dev;
void __iomem *base;
@@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev)
netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll);
netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll);
+ phydev = of_phy_find_device(priv->phy_node);
+ if (phydev) {
+ phydev->mac_managed_pm = true;
+ put_device(&phydev->mdio.dev);
+ }
+
return devm_register_netdev(dev, ndev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index c54fd01ea635..3d274599015b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -989,7 +989,12 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
struct net *net = dev_net(x->xso.dev);
+ u64 trailer_packets = 0, trailer_bytes = 0;
+ u64 replay_packets = 0, replay_bytes = 0;
+ u64 auth_packets = 0, auth_bytes = 0;
+ u64 success_packets, success_bytes;
u64 packets, bytes, lastuse;
+ size_t headers;
lockdep_assert(lockdep_is_held(&x->lock) ||
lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
@@ -999,26 +1004,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
return;
if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
- mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse);
- x->stats.integrity_failed += packets;
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets);
-
- mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse);
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets);
+ mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
+ &auth_packets, &lastuse);
+ x->stats.integrity_failed += auth_packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
+
+ mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
+ &trailer_packets, &lastuse);
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
}
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
return;
- mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
- x->curlft.packets += packets;
- x->curlft.bytes += bytes;
-
if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
- mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse);
- x->stats.replay += packets;
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets);
+ mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
+ &replay_packets, &lastuse);
+ x->stats.replay += replay_packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
}
+
+ mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
+ success_packets = packets - auth_packets - trailer_packets - replay_packets;
+ x->curlft.packets += success_packets;
+ /* NIC counts all bytes passed through flow steering and doesn't have
+ * an ability to count payload data size which is needed for SA.
+ *
+ * To overcome HW limitestion, let's approximate the payload size
+ * by removing always available headers.
+ */
+ headers = sizeof(struct ethhdr);
+ if (sa_entry->attrs.family == AF_INET)
+ headers += sizeof(struct iphdr);
+ else
+ headers += sizeof(struct ipv6hdr);
+
+ success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
+ x->curlft.bytes += success_bytes - headers * success_packets;
}
static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a605eae56685..eedbcba22689 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5868,6 +5868,11 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->htb_qos_sq_stats[i]);
kvfree(priv->htb_qos_sq_stats);
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ }
+
memset(priv, 0, sizeof(*priv));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5693986ae656..ac1565c0c8af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1197,9 +1197,7 @@ static int get_num_eqs(struct mlx5_core_dev *dev)
if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev))
return 1;
- max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ max_dev_eqs = mlx5_max_eq_cap_get(dev);
num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table),
max_dev_eqs - MLX5_MAX_ASYNC_EQS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 50d2ea323979..a436ce895e45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -6,6 +6,9 @@
#include "helper.h"
#include "ofld.h"
+static int
+acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
static bool
esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
const struct mlx5_vport *vport)
@@ -123,18 +126,31 @@ static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw,
{
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *flow_rule;
+ bool created = false;
int err = 0;
+ if (!vport->ingress.acl) {
+ err = acl_ingress_ofld_setup(esw, vport);
+ if (err)
+ return err;
+ created = true;
+ }
+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
flow_act.fg = vport->ingress.offloads.drop_grp;
flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
- goto out;
+ goto err_out;
}
vport->ingress.offloads.drop_rule = flow_rule;
-out:
+
+ return 0;
+err_out:
+ /* Only destroy ingress acl created in this function. */
+ if (created)
+ esw_acl_ingress_ofld_cleanup(esw, vport);
return err;
}
@@ -299,16 +315,12 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
}
}
-int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static int
+acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
int num_ftes = 0;
int err;
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
- !esw_acl_ingress_prio_tag_enabled(esw, vport))
- return 0;
-
esw_acl_ingress_allow_rule_destroy(vport);
if (mlx5_eswitch_vport_match_metadata_enabled(esw))
@@ -347,6 +359,15 @@ group_err:
return err;
}
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !esw_acl_ingress_prio_tag_enabled(esw, vport))
+ return 0;
+
+ return acl_ingress_ofld_setup(esw, vport);
+}
+
void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 592143d5e1da..72949cb85244 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4600,20 +4600,26 @@ mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
return -EOPNOTSUPP;
}
+ if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device doesn't support getting the max number of EQs");
+ return -EOPNOTSUPP;
+ }
+
query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
if (!query_ctx)
return -ENOMEM;
mutex_lock(&esw->state_lock);
err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
- MLX5_CAP_GENERAL);
+ MLX5_CAP_GENERAL_2);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
goto out;
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
+ max_eqs = MLX5_GET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b);
if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
*max_io_eqs = 0;
else
@@ -4644,6 +4650,12 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
return -EOPNOTSUPP;
}
+ if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device doesn't support changing the max number of EQs");
+ return -EOPNOTSUPP;
+ }
+
if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) {
NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
return -EINVAL;
@@ -4655,17 +4667,17 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
mutex_lock(&esw->state_lock);
err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
- MLX5_CAP_GENERAL);
+ MLX5_CAP_GENERAL_2);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
goto out;
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
+ MLX5_SET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b, max_eqs);
err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
if (err)
NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c38342b9f320..a7fd18888b6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -383,4 +383,14 @@ static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vpo
: vport;
}
+static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev)
+{
+ if (MLX5_CAP_GEN_2(dev, max_num_eqs_24b))
+ return MLX5_CAP_GEN_2(dev, max_num_eqs_24b);
+
+ if (MLX5_CAP_GEN(dev, max_num_eqs))
+ return MLX5_CAP_GEN(dev, max_num_eqs);
+
+ return 1 << MLX5_CAP_GEN(dev, log_max_eq);
+}
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index fb8787e30d3f..401d39069680 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -711,9 +711,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
{
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int num_eqs = mlx5_max_eq_cap_get(dev);
int total_vec;
int pcif_vec;
int req_vec;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index 025e0db983fe..b032d5a4b3b8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -1484,6 +1484,7 @@ err_type_file_file_validate:
vfree(types_info->data);
err_data_alloc:
kfree(types_info);
+ linecards->types_info = NULL;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index bf66d996e32e..c0ced4d315f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1594,18 +1594,25 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
return -EBUSY;
}
-static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci,
+ bool pci_reset_sbr_supported)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mrsr_pl[MLXSW_REG_MRSR_LEN];
int err;
+ if (!pci_reset_sbr_supported) {
+ pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n");
+ goto sbr;
+ }
+
mlxsw_reg_mrsr_pack(mrsr_pl,
MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
if (err)
return err;
+sbr:
device_lock_assert(&pdev->dev);
pci_cfg_access_lock(pdev);
@@ -1633,6 +1640,7 @@ static int
mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
+ bool pci_reset_sbr_supported = false;
char mcam_pl[MLXSW_REG_MCAM_LEN];
bool pci_reset_supported = false;
u32 sys_status;
@@ -1652,13 +1660,17 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
- if (!err)
+ if (!err) {
mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
&pci_reset_supported);
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR,
+ &pci_reset_sbr_supported);
+ }
if (pci_reset_supported) {
pci_dbg(pdev, "Starting PCI reset flow\n");
- err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+ err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci,
+ pci_reset_sbr_supported);
} else {
pci_dbg(pdev, "Starting software reset flow\n");
err = mlxsw_pci_reset_sw(mlxsw_pci);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8adf86a6f5cc..3bb89045eaf5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -10671,6 +10671,8 @@ enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
MLXSW_REG_MCAM_MCIA_128B = 34,
/* If set, MRSR.command=6 is supported. */
MLXSW_REG_MCAM_PCI_RESET = 48,
+ /* If set, MRSR.command=6 is supported with Secondary Bus Reset. */
+ MLXSW_REG_MCAM_PCI_RESET_SBR = 67,
};
#define MLXSW_REG_BYTES_PER_DWORD 0x4
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index c9f1c79f3f9d..ba090262e27e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -1607,8 +1607,8 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
unsigned int sb_index)
{
+ u16 local_port, local_port_1, first_local_port, last_local_port;
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
- u16 local_port, local_port_1, last_local_port;
struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx;
u8 masked_count, current_page = 0;
unsigned long cb_priv = 0;
@@ -1628,6 +1628,7 @@ next_batch:
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, false);
mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page);
+ first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE;
last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE +
MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1;
@@ -1645,9 +1646,12 @@ next_batch:
if (local_port != MLXSW_PORT_CPU_PORT) {
/* Ingress quotas are not supported for the CPU port */
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
- local_port, 1);
+ local_port - first_local_port,
+ 1);
}
- mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
+ mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl,
+ local_port - first_local_port,
+ 1);
for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
&bulk_list);
@@ -1684,7 +1688,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
unsigned int sb_index)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
- u16 local_port, last_local_port;
+ u16 local_port, first_local_port, last_local_port;
LIST_HEAD(bulk_list);
unsigned int masked_count;
u8 current_page = 0;
@@ -1702,6 +1706,7 @@ next_batch:
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, true);
mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page);
+ first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE;
last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE +
MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1;
@@ -1719,9 +1724,12 @@ next_batch:
if (local_port != MLXSW_PORT_CPU_PORT) {
/* Ingress quotas are not supported for the CPU port */
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
- local_port, 1);
+ local_port - first_local_port,
+ 1);
}
- mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
+ mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl,
+ local_port - first_local_port,
+ 1);
for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
&bulk_list);
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 6453c92f0fa7..7fa1820db9cc 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
netif_dbg(ks, intr, ks->netdev,
"%s: txspace %d\n", __func__, tx_space);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->tx_space = tx_space;
if (netif_queue_stopped(ks->netdev))
netif_wake_queue(ks->netdev);
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
if (status & IRQ_SPIBEI) {
@@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev)
ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
ks->queued_len = 0;
+ ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
netif_start_queue(ks->netdev);
netif_dbg(ks, ifup, ks->netdev, "network device up\n");
@@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev)
/* schedule work to do the actual set of the data if needed */
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) {
memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl));
schedule_work(&ks->rxctrl_work);
}
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
static int ks8851_set_mac_address(struct net_device *dev, void *addr)
@@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev,
int ret;
ks->netdev = netdev;
- ks->tx_space = 6144;
ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
ret = PTR_ERR_OR_ZERO(ks->gpio);
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 670c1de966db..3062cc0f9199 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work)
tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->queued_len -= dequeued_len;
ks->tx_space = tx_space;
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
ks8851_unlock_spi(ks, &flags);
}
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index d0f4ff4ee075..0d1740d64676 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -1127,8 +1127,12 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev,
if (netdev->phydev)
phy_ethtool_get_wol(netdev->phydev, wol);
- wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST |
- WAKE_MAGIC | WAKE_PHY | WAKE_ARP;
+ if (wol->supported != adapter->phy_wol_supported)
+ netif_warn(adapter, drv, adapter->netdev,
+ "PHY changed its supported WOL! old=%x, new=%x\n",
+ adapter->phy_wol_supported, wol->supported);
+
+ wol->supported |= MAC_SUPPORTED_WAKES;
if (adapter->is_pci11x1x)
wol->supported |= WAKE_MAGICSECURE;
@@ -1143,7 +1147,39 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev,
{
struct lan743x_adapter *adapter = netdev_priv(netdev);
+ /* WAKE_MAGICSEGURE is a modifier of and only valid together with
+ * WAKE_MAGIC
+ */
+ if ((wol->wolopts & WAKE_MAGICSECURE) && !(wol->wolopts & WAKE_MAGIC))
+ return -EINVAL;
+
+ if (netdev->phydev) {
+ struct ethtool_wolinfo phy_wol;
+ int ret;
+
+ phy_wol.wolopts = wol->wolopts & adapter->phy_wol_supported;
+
+ /* If WAKE_MAGICSECURE was requested, filter out WAKE_MAGIC
+ * for PHYs that do not support WAKE_MAGICSECURE
+ */
+ if (wol->wolopts & WAKE_MAGICSECURE &&
+ !(adapter->phy_wol_supported & WAKE_MAGICSECURE))
+ phy_wol.wolopts &= ~WAKE_MAGIC;
+
+ ret = phy_ethtool_set_wol(netdev->phydev, &phy_wol);
+ if (ret && (ret != -EOPNOTSUPP))
+ return ret;
+
+ if (ret == -EOPNOTSUPP)
+ adapter->phy_wolopts = 0;
+ else
+ adapter->phy_wolopts = phy_wol.wolopts;
+ } else {
+ adapter->phy_wolopts = 0;
+ }
+
adapter->wolopts = 0;
+ wol->wolopts &= ~adapter->phy_wolopts;
if (wol->wolopts & WAKE_UCAST)
adapter->wolopts |= WAKE_UCAST;
if (wol->wolopts & WAKE_MCAST)
@@ -1164,10 +1200,10 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev,
memset(adapter->sopass, 0, sizeof(u8) * SOPASS_MAX);
}
+ wol->wolopts = adapter->wolopts | adapter->phy_wolopts;
device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts);
- return netdev->phydev ? phy_ethtool_set_wol(netdev->phydev, wol)
- : -ENETDOWN;
+ return 0;
}
#endif /* CONFIG_PM */
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 6be8a43c908a..e418539565b1 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3118,6 +3118,17 @@ static int lan743x_netdev_open(struct net_device *netdev)
if (ret)
goto close_tx;
}
+
+#ifdef CONFIG_PM
+ if (adapter->netdev->phydev) {
+ struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+
+ phy_ethtool_get_wol(netdev->phydev, &wol);
+ adapter->phy_wol_supported = wol.supported;
+ adapter->phy_wolopts = wol.wolopts;
+ }
+#endif
+
return 0;
close_tx:
@@ -3575,7 +3586,7 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter)
/* clear wake settings */
pmtctl = lan743x_csr_read(adapter, PMT_CTL);
- pmtctl |= PMT_CTL_WUPS_MASK_;
+ pmtctl |= PMT_CTL_WUPS_MASK_ | PMT_CTL_RES_CLR_WKP_MASK_;
pmtctl &= ~(PMT_CTL_GPIO_WAKEUP_EN_ | PMT_CTL_EEE_WAKEUP_EN_ |
PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_ |
PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ | PMT_CTL_ETH_PHY_WAKE_EN_);
@@ -3587,10 +3598,9 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter)
pmtctl |= PMT_CTL_ETH_PHY_D3_COLD_OVR_ | PMT_CTL_ETH_PHY_D3_OVR_;
- if (adapter->wolopts & WAKE_PHY) {
- pmtctl |= PMT_CTL_ETH_PHY_EDPD_PLL_CTL_;
+ if (adapter->phy_wolopts)
pmtctl |= PMT_CTL_ETH_PHY_WAKE_EN_;
- }
+
if (adapter->wolopts & WAKE_MAGIC) {
wucsr |= MAC_WUCSR_MPEN_;
macrx |= MAC_RX_RXEN_;
@@ -3686,7 +3696,7 @@ static int lan743x_pm_suspend(struct device *dev)
lan743x_csr_write(adapter, MAC_WUCSR2, 0);
lan743x_csr_write(adapter, MAC_WK_SRC, 0xFFFFFFFF);
- if (adapter->wolopts)
+ if (adapter->wolopts || adapter->phy_wolopts)
lan743x_pm_set_wol(adapter);
if (adapter->is_pci11x1x) {
@@ -3710,6 +3720,7 @@ static int lan743x_pm_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
struct lan743x_adapter *adapter = netdev_priv(netdev);
+ u32 data;
int ret;
pci_set_power_state(pdev, PCI_D0);
@@ -3728,6 +3739,30 @@ static int lan743x_pm_resume(struct device *dev)
return ret;
}
+ ret = lan743x_csr_read(adapter, MAC_WK_SRC);
+ netif_dbg(adapter, drv, adapter->netdev,
+ "Wakeup source : 0x%08X\n", ret);
+
+ /* Clear the wol configuration and status bits. Note that
+ * the status bits are "Write One to Clear (W1C)"
+ */
+ data = MAC_WUCSR_EEE_TX_WAKE_ | MAC_WUCSR_EEE_RX_WAKE_ |
+ MAC_WUCSR_RFE_WAKE_FR_ | MAC_WUCSR_PFDA_FR_ | MAC_WUCSR_WUFR_ |
+ MAC_WUCSR_MPR_ | MAC_WUCSR_BCAST_FR_;
+ lan743x_csr_write(adapter, MAC_WUCSR, data);
+
+ data = MAC_WUCSR2_NS_RCD_ | MAC_WUCSR2_ARP_RCD_ |
+ MAC_WUCSR2_IPV6_TCPSYN_RCD_ | MAC_WUCSR2_IPV4_TCPSYN_RCD_;
+ lan743x_csr_write(adapter, MAC_WUCSR2, data);
+
+ data = MAC_WK_SRC_ETH_PHY_WK_ | MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ |
+ MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ | MAC_WK_SRC_EEE_TX_WK_ |
+ MAC_WK_SRC_EEE_RX_WK_ | MAC_WK_SRC_RFE_FR_WK_ |
+ MAC_WK_SRC_PFDA_FR_WK_ | MAC_WK_SRC_MP_FR_WK_ |
+ MAC_WK_SRC_BCAST_FR_WK_ | MAC_WK_SRC_WU_FR_WK_ |
+ MAC_WK_SRC_WK_FR_SAVED_;
+ lan743x_csr_write(adapter, MAC_WK_SRC, data);
+
/* open netdev when netdev is at running state while resume.
* For instance, it is true when system wakesup after pm-suspend
* However, it is false when system wakes up after suspend GUI menu
@@ -3736,9 +3771,6 @@ static int lan743x_pm_resume(struct device *dev)
lan743x_netdev_open(netdev);
netif_device_attach(netdev);
- ret = lan743x_csr_read(adapter, MAC_WK_SRC);
- netif_info(adapter, drv, adapter->netdev,
- "Wakeup source : 0x%08X\n", ret);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 645bc048e52e..3b2585a384e2 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -61,6 +61,7 @@
#define PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ BIT(18)
#define PMT_CTL_GPIO_WAKEUP_EN_ BIT(15)
#define PMT_CTL_EEE_WAKEUP_EN_ BIT(13)
+#define PMT_CTL_RES_CLR_WKP_MASK_ GENMASK(9, 8)
#define PMT_CTL_READY_ BIT(7)
#define PMT_CTL_ETH_PHY_RST_ BIT(4)
#define PMT_CTL_WOL_EN_ BIT(3)
@@ -227,12 +228,31 @@
#define MAC_WUCSR (0x140)
#define MAC_MP_SO_EN_ BIT(21)
#define MAC_WUCSR_RFE_WAKE_EN_ BIT(14)
+#define MAC_WUCSR_EEE_TX_WAKE_ BIT(13)
+#define MAC_WUCSR_EEE_RX_WAKE_ BIT(11)
+#define MAC_WUCSR_RFE_WAKE_FR_ BIT(9)
+#define MAC_WUCSR_PFDA_FR_ BIT(7)
+#define MAC_WUCSR_WUFR_ BIT(6)
+#define MAC_WUCSR_MPR_ BIT(5)
+#define MAC_WUCSR_BCAST_FR_ BIT(4)
#define MAC_WUCSR_PFDA_EN_ BIT(3)
#define MAC_WUCSR_WAKE_EN_ BIT(2)
#define MAC_WUCSR_MPEN_ BIT(1)
#define MAC_WUCSR_BCST_EN_ BIT(0)
#define MAC_WK_SRC (0x144)
+#define MAC_WK_SRC_ETH_PHY_WK_ BIT(17)
+#define MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ BIT(16)
+#define MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ BIT(15)
+#define MAC_WK_SRC_EEE_TX_WK_ BIT(14)
+#define MAC_WK_SRC_EEE_RX_WK_ BIT(13)
+#define MAC_WK_SRC_RFE_FR_WK_ BIT(12)
+#define MAC_WK_SRC_PFDA_FR_WK_ BIT(11)
+#define MAC_WK_SRC_MP_FR_WK_ BIT(10)
+#define MAC_WK_SRC_BCAST_FR_WK_ BIT(9)
+#define MAC_WK_SRC_WU_FR_WK_ BIT(8)
+#define MAC_WK_SRC_WK_FR_SAVED_ BIT(7)
+
#define MAC_MP_SO_HI (0x148)
#define MAC_MP_SO_LO (0x14C)
@@ -295,6 +315,10 @@
#define RFE_INDX(index) (0x580 + (index << 2))
#define MAC_WUCSR2 (0x600)
+#define MAC_WUCSR2_NS_RCD_ BIT(7)
+#define MAC_WUCSR2_ARP_RCD_ BIT(6)
+#define MAC_WUCSR2_IPV6_TCPSYN_RCD_ BIT(5)
+#define MAC_WUCSR2_IPV4_TCPSYN_RCD_ BIT(4)
#define SGMII_ACC (0x720)
#define SGMII_ACC_SGMII_BZY_ BIT(31)
@@ -1018,6 +1042,8 @@ enum lan743x_sgmii_lsd {
LINK_2500_SLAVE
};
+#define MAC_SUPPORTED_WAKES (WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | \
+ WAKE_MAGIC | WAKE_ARP)
struct lan743x_adapter {
struct net_device *netdev;
struct mii_bus *mdiobus;
@@ -1025,6 +1051,8 @@ struct lan743x_adapter {
#ifdef CONFIG_PM
u32 wolopts;
u8 sopass[SOPASS_MAX];
+ u32 phy_wolopts;
+ u32 phy_wol_supported;
#endif
struct pci_dev *pdev;
struct lan743x_csr csr;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index d087cf954f75..608ad31a9702 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd)
if (ret)
goto init_fail;
+ /* madev is owned by the auxiliary device */
+ madev = NULL;
ret = auxiliary_device_add(adev);
if (ret)
goto add_fail;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index f30eee4a5a80..b6c01a88098d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -375,7 +375,9 @@ typedef void (*ionic_cq_done_cb)(void *done_arg);
unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg);
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do);
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi);
int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
struct ionic_queue *q, unsigned int index, const char *name,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1934e9d6d9e4..1837a30ba08a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1189,7 +1189,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
ionic_rx_service, NULL, NULL);
if (lif->hwstamp_txq)
- tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget);
+ tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget, !!budget);
work_done = max(max(n_work, a_work), max(rx_work, tx_work));
if (work_done < budget && napi_complete_done(napi, work_done)) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 2427610f4306..9fdd7cd3ef19 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -23,7 +23,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp);
+ struct ionic_txq_comp *comp,
+ bool in_napi);
static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell)
{
@@ -480,6 +481,20 @@ int ionic_xdp_xmit(struct net_device *netdev, int n,
return nxmit;
}
+static void ionic_xdp_rx_put_bufs(struct ionic_queue *q,
+ struct ionic_buf_info *buf_info,
+ int nbufs)
+{
+ int i;
+
+ for (i = 0; i < nbufs; i++) {
+ dma_unmap_page(q->dev, buf_info->dma_addr,
+ IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ buf_info->page = NULL;
+ buf_info++;
+ }
+}
+
static bool ionic_run_xdp(struct ionic_rx_stats *stats,
struct net_device *netdev,
struct bpf_prog *xdp_prog,
@@ -493,6 +508,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
struct netdev_queue *nq;
struct xdp_frame *xdpf;
int remain_len;
+ int nbufs = 1;
int frag_len;
int err = 0;
@@ -542,6 +558,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
if (page_is_pfmemalloc(bi->page))
xdp_buff_set_frag_pfmemalloc(&xdp_buf);
} while (remain_len > 0);
+ nbufs += sinfo->nr_frags;
}
xdp_action = bpf_prog_run_xdp(xdp_prog, &xdp_buf);
@@ -574,9 +591,6 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
goto out_xdp_abort;
}
- dma_unmap_page(rxq->dev, buf_info->dma_addr,
- IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-
err = ionic_xdp_post_frame(txq, xdpf, XDP_TX,
buf_info->page,
buf_info->page_offset,
@@ -586,23 +600,19 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err);
goto out_xdp_abort;
}
- buf_info->page = NULL;
+ ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs);
stats->xdp_tx++;
/* the Tx completion will free the buffers */
break;
case XDP_REDIRECT:
- /* unmap the pages before handing them to a different device */
- dma_unmap_page(rxq->dev, buf_info->dma_addr,
- IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-
err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog);
if (err) {
netdev_dbg(netdev, "xdp_do_redirect err %d\n", err);
goto out_xdp_abort;
}
- buf_info->page = NULL;
+ ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs);
rxq->xdp_flush = true;
stats->xdp_redirect++;
break;
@@ -935,7 +945,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
u32 work_done = 0;
u32 flags = 0;
- work_done = ionic_tx_cq_service(cq, budget);
+ work_done = ionic_tx_cq_service(cq, budget, !!budget);
if (unlikely(!budget))
return budget;
@@ -1019,7 +1029,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
txqcq = lif->txqcqs[qi];
txcq = &lif->txqcqs[qi]->cq;
- tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT);
+ tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, !!budget);
if (unlikely(!budget))
return budget;
@@ -1152,7 +1162,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp)
+ struct ionic_txq_comp *comp,
+ bool in_napi)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
@@ -1204,11 +1215,13 @@ static void ionic_tx_clean(struct ionic_queue *q,
desc_info->bytes = skb->len;
stats->clean++;
- napi_consume_skb(skb, 1);
+ napi_consume_skb(skb, likely(in_napi) ? 1 : 0);
}
static bool ionic_tx_service(struct ionic_cq *cq,
- unsigned int *total_pkts, unsigned int *total_bytes)
+ unsigned int *total_pkts,
+ unsigned int *total_bytes,
+ bool in_napi)
{
struct ionic_tx_desc_info *desc_info;
struct ionic_queue *q = cq->bound_q;
@@ -1230,7 +1243,7 @@ static bool ionic_tx_service(struct ionic_cq *cq,
desc_info->bytes = 0;
index = q->tail_idx;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, comp);
+ ionic_tx_clean(q, desc_info, comp, in_napi);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
@@ -1244,7 +1257,9 @@ static bool ionic_tx_service(struct ionic_cq *cq,
return true;
}
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi)
{
unsigned int work_done = 0;
unsigned int bytes = 0;
@@ -1253,7 +1268,7 @@ unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
if (work_to_do == 0)
return 0;
- while (ionic_tx_service(cq, &pkts, &bytes)) {
+ while (ionic_tx_service(cq, &pkts, &bytes, in_napi)) {
if (cq->tail_idx == cq->num_descs - 1)
cq->done_color = !cq->done_color;
cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
@@ -1279,7 +1294,7 @@ void ionic_tx_flush(struct ionic_cq *cq)
{
u32 work_done;
- work_done = ionic_tx_cq_service(cq, cq->num_descs);
+ work_done = ionic_tx_cq_service(cq, cq->num_descs, false);
if (work_done)
ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index,
work_done, IONIC_INTR_CRED_RESET_COALESCE);
@@ -1296,7 +1311,7 @@ void ionic_tx_empty(struct ionic_queue *q)
desc_info = &q->tx_info[q->tail_idx];
desc_info->bytes = 0;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, NULL);
+ ionic_tx_clean(q, desc_info, NULL, false);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index ff3b89e9028e..ad06da0fdaa0 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -98,10 +98,8 @@ qcaspi_info_show(struct seq_file *s, void *what)
seq_printf(s, "IRQ : %d\n",
qca->spi_dev->irq);
- seq_printf(s, "INTR REQ : %u\n",
- qca->intr_req);
- seq_printf(s, "INTR SVC : %u\n",
- qca->intr_svc);
+ seq_printf(s, "INTR : %lx\n",
+ qca->intr);
seq_printf(s, "SPI max speed : %lu\n",
(unsigned long)qca->spi_dev->max_speed_hz);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 5799ecc88a87..8f7ce6b51a1c 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -35,6 +35,8 @@
#define MAX_DMA_BURST_LEN 5000
+#define SPI_INTR 0
+
/* Modules parameters */
#define QCASPI_CLK_SPEED_MIN 1000000
#define QCASPI_CLK_SPEED_MAX 16000000
@@ -579,14 +581,14 @@ qcaspi_spi_thread(void *data)
continue;
}
- if ((qca->intr_req == qca->intr_svc) &&
+ if (!test_bit(SPI_INTR, &qca->intr) &&
!qca->txr.skb[qca->txr.head])
schedule();
set_current_state(TASK_RUNNING);
- netdev_dbg(qca->net_dev, "have work to do. int: %d, tx_skb: %p\n",
- qca->intr_req - qca->intr_svc,
+ netdev_dbg(qca->net_dev, "have work to do. int: %lu, tx_skb: %p\n",
+ qca->intr,
qca->txr.skb[qca->txr.head]);
qcaspi_qca7k_sync(qca, QCASPI_EVENT_UPDATE);
@@ -600,8 +602,7 @@ qcaspi_spi_thread(void *data)
msleep(QCASPI_QCA7K_REBOOT_TIME_MS);
}
- if (qca->intr_svc != qca->intr_req) {
- qca->intr_svc = qca->intr_req;
+ if (test_and_clear_bit(SPI_INTR, &qca->intr)) {
start_spi_intr_handling(qca, &intr_cause);
if (intr_cause & SPI_INT_CPU_ON) {
@@ -663,7 +664,7 @@ qcaspi_intr_handler(int irq, void *data)
{
struct qcaspi *qca = data;
- qca->intr_req++;
+ set_bit(SPI_INTR, &qca->intr);
if (qca->spi_thread)
wake_up_process(qca->spi_thread);
@@ -679,8 +680,7 @@ qcaspi_netdev_open(struct net_device *dev)
if (!qca)
return -EINVAL;
- qca->intr_req = 1;
- qca->intr_svc = 0;
+ set_bit(SPI_INTR, &qca->intr);
qca->sync = QCASPI_SYNC_UNKNOWN;
qcafrm_fsm_init_spi(&qca->frm_handle);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index d59cb2352cee..8f4808695e82 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -81,8 +81,7 @@ struct qcaspi {
struct qcafrm_handle frm_handle;
struct sk_buff *rx_skb;
- unsigned int intr_req;
- unsigned int intr_svc;
+ unsigned long intr;
u16 reset_count;
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index dcab638c57fe..24c90d8f5a44 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -871,13 +871,13 @@ static void rswitch_tx_free(struct net_device *ndev)
dma_rmb();
skb = gq->skbs[gq->dirty];
if (skb) {
+ rdev->ndev->stats.tx_packets++;
+ rdev->ndev->stats.tx_bytes += skb->len;
dma_unmap_single(ndev->dev.parent,
gq->unmap_addrs[gq->dirty],
skb->len, DMA_TO_DEVICE);
dev_kfree_skb_any(gq->skbs[gq->dirty]);
gq->skbs[gq->dirty] = NULL;
- rdev->ndev->stats.tx_packets++;
- rdev->ndev->stats.tx_bytes += skb->len;
}
desc->desc.die_dt = DT_EEMPTY;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 65d7370b47d5..466c4002f00d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -272,7 +272,7 @@ static const struct ethqos_emac_por emac_v4_0_0_por[] = {
static const struct ethqos_emac_driver_data emac_v4_0_0_data = {
.por = emac_v4_0_0_por,
- .num_por = ARRAY_SIZE(emac_v3_0_0_por),
+ .num_por = ARRAY_SIZE(emac_v4_0_0_por),
.rgmii_config_loopback_en = false,
.has_emac_ge_3 = true,
.link_clk_name = "phyaux",
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index f05bd757dfe5..5ef52ef2698f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -218,6 +218,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
{
u32 num_snapshot, ts_status, tsync_int;
struct ptp_clock_event event;
+ u32 acr_value, channel;
unsigned long flags;
u64 ptp_time;
int i;
@@ -243,12 +244,15 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >>
GMAC_TIMESTAMP_ATSNS_SHIFT;
+ acr_value = readl(priv->ptpaddr + PTP_ACR);
+ channel = ilog2(FIELD_GET(PTP_ACR_MASK, acr_value));
+
for (i = 0; i < num_snapshot; i++) {
read_lock_irqsave(&priv->ptp_lock, flags);
get_ptptime(priv->ptpaddr, &ptp_time);
read_unlock_irqrestore(&priv->ptp_lock, flags);
event.type = PTP_CLOCK_EXTTS;
- event.index = 0;
+ event.index = channel;
event.timestamp = ptp_time;
ptp_clock_event(priv->ptp_clock, &event);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b3afc7cb7d72..c58782c41417 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7662,9 +7662,10 @@ int stmmac_dvr_probe(struct device *device,
#ifdef STMMAC_VLAN_TAG_USED
/* Both mac100 and gmac support receive VLAN tag detection */
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
- ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
- priv->hw->hw_vlan_en = true;
-
+ if (priv->plat->has_gmac4) {
+ ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ priv->hw->hw_vlan_en = true;
+ }
if (priv->dma_cap.vlhash) {
ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 1562fbdd0a04..996f2bcd07a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -358,24 +358,28 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope;
- /* Port Transmit Rate and Speed Divider */
- switch (div_s64(port_transmit_rate_kbps, 1000)) {
- case SPEED_10000:
- case SPEED_5000:
- ptr = 32;
- break;
- case SPEED_2500:
- case SPEED_1000:
- ptr = 8;
- break;
- case SPEED_100:
- ptr = 4;
- break;
- default:
- netdev_err(priv->dev,
- "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n",
- port_transmit_rate_kbps);
- return -EINVAL;
+ if (qopt->enable) {
+ /* Port Transmit Rate and Speed Divider */
+ switch (div_s64(port_transmit_rate_kbps, 1000)) {
+ case SPEED_10000:
+ case SPEED_5000:
+ ptr = 32;
+ break;
+ case SPEED_2500:
+ case SPEED_1000:
+ ptr = 8;
+ break;
+ case SPEED_100:
+ ptr = 4;
+ break;
+ default:
+ netdev_err(priv->dev,
+ "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n",
+ port_transmit_rate_kbps);
+ return -EINVAL;
+ }
+ } else {
+ ptr = 0;
}
mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 7c4b6881a93f..d1b682ce9c6d 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1959,6 +1959,7 @@ int wx_sw_init(struct wx *wx)
}
bitmap_zero(wx->state, WX_STATE_NBITS);
+ wx->misc_irq_domain = false;
return 0;
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 68bde91b67a0..81bedc8ee8d4 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1686,6 +1686,7 @@ static int wx_set_interrupt_capability(struct wx *wx)
}
pdev->irq = pci_irq_vector(pdev, 0);
+ wx->num_q_vectors = 1;
return 0;
}
@@ -1996,7 +1997,8 @@ void wx_free_irq(struct wx *wx)
int vector;
if (!(pdev->msix_enabled)) {
- free_irq(pdev->irq, wx);
+ if (!wx->misc_irq_domain)
+ free_irq(pdev->irq, wx);
return;
}
@@ -2011,7 +2013,7 @@ void wx_free_irq(struct wx *wx)
free_irq(entry->vector, q_vector);
}
- if (wx->mac.type == wx_mac_em)
+ if (!wx->misc_irq_domain)
free_irq(wx->msix_entry->vector, wx);
}
EXPORT_SYMBOL(wx_free_irq);
@@ -2026,6 +2028,9 @@ int wx_setup_isb_resources(struct wx *wx)
{
struct pci_dev *pdev = wx->pdev;
+ if (wx->isb_mem)
+ return 0;
+
wx->isb_mem = dma_alloc_coherent(&pdev->dev,
sizeof(u32) * 4,
&wx->isb_dma,
@@ -2385,7 +2390,6 @@ static void wx_free_all_tx_resources(struct wx *wx)
void wx_free_resources(struct wx *wx)
{
- wx_free_isb_resources(wx);
wx_free_all_rx_resources(wx);
wx_free_all_tx_resources(wx);
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 5aaf7b1fa2db..0df7f5712b6f 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1058,6 +1058,7 @@ struct wx {
dma_addr_t isb_dma;
u32 *isb_mem;
u32 isb_tag[WX_ISB_MAX];
+ bool misc_irq_domain;
#define WX_MAX_RETA_ENTRIES 128
#define WX_RSS_INDIR_TBL_MAX 64
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index e894e01d030d..af30ca0312b8 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -387,6 +387,7 @@ err_dis_phy:
err_free_irq:
wx_free_irq(wx);
err_free_resources:
+ wx_free_isb_resources(wx);
wx_free_resources(wx);
return err;
}
@@ -408,6 +409,7 @@ static int ngbe_close(struct net_device *netdev)
ngbe_down(wx);
wx_free_irq(wx);
+ wx_free_isb_resources(wx);
wx_free_resources(wx);
phylink_disconnect_phy(wx->phylink);
wx_control_hw(wx, false);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index b3e3605d1edb..a4cf682dca65 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -27,57 +27,19 @@ void txgbe_irq_enable(struct wx *wx, bool queues)
}
/**
- * txgbe_intr - msi/legacy mode Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
-{
- struct wx_q_vector *q_vector;
- struct wx *wx = data;
- struct pci_dev *pdev;
- u32 eicr;
-
- q_vector = wx->q_vector[0];
- pdev = wx->pdev;
-
- eicr = wx_misc_isb(wx, WX_ISB_VEC0);
- if (!eicr) {
- /* shared interrupt alert!
- * the interrupt that we masked before the ICR read.
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, true);
- return IRQ_NONE; /* Not our interrupt */
- }
- wx->isb_mem[WX_ISB_VEC0] = 0;
- if (!(pdev->msi_enabled))
- wr32(wx, WX_PX_INTA, 1);
-
- wx->isb_mem[WX_ISB_MISC] = 0;
- /* would disable interrupts here but it is auto disabled */
- napi_schedule_irqoff(&q_vector->napi);
-
- /* re-enable link(maybe) and non-queue interrupts, no flush.
- * txgbe_poll will re-enable the queue interrupts
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, false);
-
- return IRQ_HANDLED;
-}
-
-/**
- * txgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * txgbe_request_queue_irqs - Initialize MSI-X queue interrupts
* @wx: board private structure
*
- * Allocate MSI-X vectors and request interrupts from the kernel.
+ * Allocate MSI-X queue vectors and request interrupts from the kernel.
**/
-static int txgbe_request_msix_irqs(struct wx *wx)
+int txgbe_request_queue_irqs(struct wx *wx)
{
struct net_device *netdev = wx->netdev;
int vector, err;
+ if (!wx->pdev->msix_enabled)
+ return 0;
+
for (vector = 0; vector < wx->num_q_vectors; vector++) {
struct wx_q_vector *q_vector = wx->q_vector[vector];
struct msix_entry *entry = &wx->msix_q_entries[vector];
@@ -110,34 +72,6 @@ free_queue_irqs:
return err;
}
-/**
- * txgbe_request_irq - initialize interrupts
- * @wx: board private structure
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- **/
-int txgbe_request_irq(struct wx *wx)
-{
- struct net_device *netdev = wx->netdev;
- struct pci_dev *pdev = wx->pdev;
- int err;
-
- if (pdev->msix_enabled)
- err = txgbe_request_msix_irqs(wx);
- else if (pdev->msi_enabled)
- err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
- netdev->name, wx);
- else
- err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
- netdev->name, wx);
-
- if (err)
- wx_err(wx, "request_irq failed, Error %d\n", err);
-
- return err;
-}
-
static int txgbe_request_gpio_irq(struct txgbe *txgbe)
{
txgbe->gpio_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO);
@@ -178,6 +112,36 @@ static const struct irq_domain_ops txgbe_misc_irq_domain_ops = {
static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
{
+ struct wx_q_vector *q_vector;
+ struct txgbe *txgbe = data;
+ struct wx *wx = txgbe->wx;
+ u32 eicr;
+
+ if (wx->pdev->msix_enabled)
+ return IRQ_WAKE_THREAD;
+
+ eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+ if (!eicr) {
+ /* shared interrupt alert!
+ * the interrupt that we masked before the ICR read.
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ wx->isb_mem[WX_ISB_VEC0] = 0;
+ if (!(wx->pdev->msi_enabled))
+ wr32(wx, WX_PX_INTA, 1);
+
+ /* would disable interrupts here but it is auto disabled */
+ q_vector = wx->q_vector[0];
+ napi_schedule_irqoff(&q_vector->napi);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
+{
struct txgbe *txgbe = data;
struct wx *wx = txgbe->wx;
unsigned int nhandled = 0;
@@ -223,6 +187,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe)
int txgbe_setup_misc_irq(struct txgbe *txgbe)
{
+ unsigned long flags = IRQF_ONESHOT;
struct wx *wx = txgbe->wx;
int hwirq, err;
@@ -236,14 +201,17 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
irq_create_mapping(txgbe->misc.domain, hwirq);
txgbe->misc.chip = txgbe_irq_chip;
- if (wx->pdev->msix_enabled)
+ if (wx->pdev->msix_enabled) {
txgbe->misc.irq = wx->msix_entry->vector;
- else
+ } else {
txgbe->misc.irq = wx->pdev->irq;
+ if (!wx->pdev->msi_enabled)
+ flags |= IRQF_SHARED;
+ }
- err = request_threaded_irq(txgbe->misc.irq, NULL,
- txgbe_misc_irq_handle,
- IRQF_ONESHOT,
+ err = request_threaded_irq(txgbe->misc.irq, txgbe_misc_irq_handle,
+ txgbe_misc_irq_thread_fn,
+ flags,
wx->netdev->name, txgbe);
if (err)
goto del_misc_irq;
@@ -256,6 +224,8 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
if (err)
goto free_gpio_irq;
+ wx->misc_irq_domain = true;
+
return 0;
free_gpio_irq:
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
index b77945e7a0f2..e6285b94625e 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
@@ -2,6 +2,6 @@
/* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */
void txgbe_irq_enable(struct wx *wx, bool queues);
-int txgbe_request_irq(struct wx *wx);
+int txgbe_request_queue_irqs(struct wx *wx);
void txgbe_free_misc_irq(struct txgbe *txgbe);
int txgbe_setup_misc_irq(struct txgbe *txgbe);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 8c7a74981b90..ca74d9422065 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -294,9 +294,9 @@ static int txgbe_open(struct net_device *netdev)
wx_configure(wx);
- err = txgbe_request_irq(wx);
+ err = txgbe_request_queue_irqs(wx);
if (err)
- goto err_free_isb;
+ goto err_free_resources;
/* Notify the stack of the actual queue counts. */
err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
@@ -313,8 +313,8 @@ static int txgbe_open(struct net_device *netdev)
err_free_irq:
wx_free_irq(wx);
-err_free_isb:
- wx_free_isb_resources(wx);
+err_free_resources:
+ wx_free_resources(wx);
err_reset:
txgbe_reset(wx);
@@ -729,6 +729,7 @@ static void txgbe_remove(struct pci_dev *pdev)
txgbe_remove_phy(txgbe);
txgbe_free_misc_irq(txgbe);
+ wx_free_isb_resources(wx);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index ffe7f463e16e..ef6df0e37bea 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -119,7 +119,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
skb->protocol = eth_type_trans(skb, ndev);
skb->ip_summed = CHECKSUM_NONE;
- if (__netif_rx(skb) == NET_RX_DROP) {
+ if (netif_rx(skb) == NET_RX_DROP) {
ndev->stats.rx_errors++;
ndev->stats.rx_dropped++;
} else {
diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h
index 1c19ae74ad2b..4830b25e6c7d 100644
--- a/drivers/net/phy/aquantia/aquantia.h
+++ b/drivers/net/phy/aquantia/aquantia.h
@@ -6,6 +6,9 @@
* Author: Heiner Kallweit <[email protected]>
*/
+#ifndef AQUANTIA_H
+#define AQUANTIA_H
+
#include <linux/device.h>
#include <linux/phy.h>
@@ -120,3 +123,5 @@ static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; }
#endif
int aqr_firmware_load(struct phy_device *phydev);
+
+#endif /* AQUANTIA_H */
diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c
index 326c9770a6dc..c706429b225a 100644
--- a/drivers/net/phy/dp83tg720.c
+++ b/drivers/net/phy/dp83tg720.c
@@ -17,6 +17,11 @@
#define DP83TG720S_PHY_RESET 0x1f
#define DP83TG720S_HW_RESET BIT(15)
+#define DP83TG720S_LPS_CFG3 0x18c
+/* Power modes are documented as bit fields but used as values */
+/* Power Mode 0 is Normal mode */
+#define DP83TG720S_LPS_CFG3_PWR_MODE_0 BIT(0)
+
#define DP83TG720S_RGMII_DELAY_CTRL 0x602
/* In RGMII mode, Enable or disable the internal delay for RXD */
#define DP83TG720S_RGMII_RX_CLK_SEL BIT(1)
@@ -31,11 +36,20 @@
static int dp83tg720_config_aneg(struct phy_device *phydev)
{
+ int ret;
+
/* Autoneg is not supported and this PHY supports only one speed.
* We need to care only about master/slave configuration if it was
* changed by user.
*/
- return genphy_c45_pma_baset1_setup_master_slave(phydev);
+ ret = genphy_c45_pma_baset1_setup_master_slave(phydev);
+ if (ret)
+ return ret;
+
+ /* Re-read role configuration to make changes visible even if
+ * the link is in administrative down state.
+ */
+ return genphy_c45_pma_baset1_read_master_slave(phydev);
}
static int dp83tg720_read_status(struct phy_device *phydev)
@@ -64,6 +78,8 @@ static int dp83tg720_read_status(struct phy_device *phydev)
return ret;
/* After HW reset we need to restore master/slave configuration.
+ * genphy_c45_pma_baset1_read_master_slave() call will be done
+ * by the dp83tg720_config_aneg() function.
*/
ret = dp83tg720_config_aneg(phydev);
if (ret)
@@ -154,10 +170,24 @@ static int dp83tg720_config_init(struct phy_device *phydev)
*/
usleep_range(1000, 2000);
- if (phy_interface_is_rgmii(phydev))
- return dp83tg720_config_rgmii_delay(phydev);
+ if (phy_interface_is_rgmii(phydev)) {
+ ret = dp83tg720_config_rgmii_delay(phydev);
+ if (ret)
+ return ret;
+ }
+
+ /* In case the PHY is bootstrapped in managed mode, we need to
+ * wake it.
+ */
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LPS_CFG3,
+ DP83TG720S_LPS_CFG3_PWR_MODE_0);
+ if (ret)
+ return ret;
- return 0;
+ /* Make role configuration visible for ethtool on init and after
+ * rest.
+ */
+ return genphy_c45_pma_baset1_read_master_slave(phydev);
}
static struct phy_driver dp83tg720_driver[] = {
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 5aada7cf3da7..ebafedde0ab7 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -5607,6 +5607,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = {
{ PHY_ID_KSZ8081, MICREL_PHY_ID_MASK },
{ PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK },
{ PHY_ID_KSZ886X, MICREL_PHY_ID_MASK },
+ { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK },
{ PHY_ID_LAN8814, MICREL_PHY_ID_MASK },
{ PHY_ID_LAN8804, MICREL_PHY_ID_MASK },
{ PHY_ID_LAN8841, MICREL_PHY_ID_MASK },
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index a838b61cd844..a35528497a57 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev)
ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
lan87xx_cable_test_report_trans(detect));
- return 0;
+ return phy_init_hw(phydev);
}
static int lan87xx_cable_test_get_status(struct phy_device *phydev,
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index b2d36a3a96f1..e5f8ac4b4604 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -107,6 +107,7 @@ struct gpy_priv {
u8 fw_major;
u8 fw_minor;
+ u32 wolopts;
/* It takes 3 seconds to fully switch out of loopback mode before
* it can safely re-enter loopback mode. Record the time when
@@ -221,6 +222,15 @@ static int gpy_hwmon_register(struct phy_device *phydev)
}
#endif
+static int gpy_ack_interrupt(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Clear all pending interrupts */
+ ret = phy_read(phydev, PHY_ISTAT);
+ return ret < 0 ? ret : 0;
+}
+
static int gpy_mbox_read(struct phy_device *phydev, u32 addr)
{
struct gpy_priv *priv = phydev->priv;
@@ -262,16 +272,8 @@ out:
static int gpy_config_init(struct phy_device *phydev)
{
- int ret;
-
- /* Mask all interrupts */
- ret = phy_write(phydev, PHY_IMASK, 0);
- if (ret)
- return ret;
-
- /* Clear all pending interrupts */
- ret = phy_read(phydev, PHY_ISTAT);
- return ret < 0 ? ret : 0;
+ /* Nothing to configure. Configuration Requirement Placeholder */
+ return 0;
}
static int gpy21x_config_init(struct phy_device *phydev)
@@ -627,11 +629,23 @@ static int gpy_read_status(struct phy_device *phydev)
static int gpy_config_intr(struct phy_device *phydev)
{
+ struct gpy_priv *priv = phydev->priv;
u16 mask = 0;
+ int ret;
+
+ ret = gpy_ack_interrupt(phydev);
+ if (ret)
+ return ret;
if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
mask = PHY_IMASK_MASK;
+ if (priv->wolopts & WAKE_MAGIC)
+ mask |= PHY_IMASK_WOL;
+
+ if (priv->wolopts & WAKE_PHY)
+ mask |= PHY_IMASK_LSTC;
+
return phy_write(phydev, PHY_IMASK, mask);
}
@@ -678,6 +692,7 @@ static int gpy_set_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol)
{
struct net_device *attach_dev = phydev->attached_dev;
+ struct gpy_priv *priv = phydev->priv;
int ret;
if (wol->wolopts & WAKE_MAGIC) {
@@ -725,6 +740,8 @@ static int gpy_set_wol(struct phy_device *phydev,
ret = phy_read(phydev, PHY_ISTAT);
if (ret < 0)
return ret;
+
+ priv->wolopts |= WAKE_MAGIC;
} else {
/* Disable magic packet matching */
ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
@@ -732,6 +749,13 @@ static int gpy_set_wol(struct phy_device *phydev,
WOL_EN);
if (ret < 0)
return ret;
+
+ /* Disable the WOL interrupt */
+ ret = phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_WOL);
+ if (ret < 0)
+ return ret;
+
+ priv->wolopts &= ~WAKE_MAGIC;
}
if (wol->wolopts & WAKE_PHY) {
@@ -748,9 +772,11 @@ static int gpy_set_wol(struct phy_device *phydev,
if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC))
phy_trigger_machine(phydev);
+ priv->wolopts |= WAKE_PHY;
return 0;
}
+ priv->wolopts &= ~WAKE_PHY;
/* Disable the link state change interrupt */
return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC);
}
@@ -758,18 +784,10 @@ static int gpy_set_wol(struct phy_device *phydev,
static void gpy_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol)
{
- int ret;
+ struct gpy_priv *priv = phydev->priv;
wol->supported = WAKE_MAGIC | WAKE_PHY;
- wol->wolopts = 0;
-
- ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL);
- if (ret & WOL_EN)
- wol->wolopts |= WAKE_MAGIC;
-
- ret = phy_read(phydev, PHY_IMASK);
- if (ret & PHY_IMASK_LSTC)
- wol->wolopts |= WAKE_PHY;
+ wol->wolopts = priv->wolopts;
}
static int gpy_loopback(struct phy_device *phydev, bool enable)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0a65b6d690fe..eb9acfcaeb09 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -70,6 +70,7 @@
#define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */
#define PPP_PROTO_LEN 2
+#define PPP_LCP_HDRLEN 4
/*
* An instance of /dev/ppp can be associated with either a ppp
@@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
return ret;
}
+static bool ppp_check_packet(struct sk_buff *skb, size_t count)
+{
+ /* LCP packets must include LCP header which 4 bytes long:
+ * 1-byte code, 1-byte identifier, and 2-byte length.
+ */
+ return get_unaligned_be16(skb->data) != PPP_LCP ||
+ count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN;
+}
+
static ssize_t ppp_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
kfree_skb(skb);
goto out;
}
+ ret = -EINVAL;
+ if (unlikely(!ppp_check_packet(skb, count))) {
+ kfree_skb(skb);
+ goto out;
+ }
switch (pf->kind) {
case INTERFACE:
diff --git a/drivers/net/pse-pd/Kconfig b/drivers/net/pse-pd/Kconfig
index 577ea904b3d9..7fab916a7f46 100644
--- a/drivers/net/pse-pd/Kconfig
+++ b/drivers/net/pse-pd/Kconfig
@@ -23,6 +23,7 @@ config PSE_REGULATOR
config PSE_PD692X0
tristate "PD692X0 PSE controller"
depends on I2C
+ select FW_LOADER
select FW_UPLOAD
help
This module provides support for PD692x0 regulator based Ethernet
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 51c295e1e823..b034ef8a73ea 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -174,7 +174,6 @@ struct ax88179_data {
u32 wol_supported;
u32 wolopts;
u8 disconnecting;
- u8 initialized;
};
struct ax88179_int_data {
@@ -327,7 +326,8 @@ static void ax88179_status(struct usbnet *dev, struct urb *urb)
if (netif_carrier_ok(dev->net) != link) {
usbnet_link_change(dev, link, 1);
- netdev_info(dev->net, "ax88179 - Link status is: %d\n", link);
+ if (!link)
+ netdev_info(dev->net, "ax88179 - Link status is: 0\n");
}
}
@@ -1543,6 +1543,7 @@ static int ax88179_link_reset(struct usbnet *dev)
GMII_PHY_PHYSR, 2, &tmp16);
if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
+ netdev_info(dev->net, "ax88179 - Link status is: 0\n");
return 0;
} else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
@@ -1580,6 +1581,8 @@ static int ax88179_link_reset(struct usbnet *dev)
netif_carrier_on(dev->net);
+ netdev_info(dev->net, "ax88179 - Link status is: 1\n");
+
return 0;
}
@@ -1678,12 +1681,21 @@ static int ax88179_reset(struct usbnet *dev)
static int ax88179_net_reset(struct usbnet *dev)
{
- struct ax88179_data *ax179_data = dev->driver_priv;
+ u16 tmp16;
- if (ax179_data->initialized)
+ ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, GMII_PHY_PHYSR,
+ 2, &tmp16);
+ if (tmp16) {
+ ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
+ 2, 2, &tmp16);
+ if (!(tmp16 & AX_MEDIUM_RECEIVE_EN)) {
+ tmp16 |= AX_MEDIUM_RECEIVE_EN;
+ ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
+ 2, 2, &tmp16);
+ }
+ } else {
ax88179_reset(dev);
- else
- ax179_data->initialized = 1;
+ }
return 0;
}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 663e46348ce3..386d62769ded 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1372,6 +1372,8 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */
{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 97afd7335d86..01a3b2417a54 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -778,7 +778,8 @@ static int rtl8150_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *ecmd)
{
rtl8150_t *dev = netdev_priv(netdev);
- short lpa, bmcr;
+ short lpa = 0;
+ short bmcr = 0;
u32 supported;
supported = (SUPPORTED_10baseT_Half |
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 61a57d134544..ea10db9a09fa 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1360,6 +1360,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
+ /* Partially checksummed packets must be dropped. */
+ if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
+ goto err_xdp;
+
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1677,6 +1681,10 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
if (unlikely(hdr->hdr.gso_type))
return NULL;
+ /* Partially checksummed packets must be dropped. */
+ if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
+ return NULL;
+
/* Now XDP core assumes frag size is PAGE_SIZE, but buffers
* with headroom may add hole in truesize, which
* make their length exceed PAGE_SIZE. So we disabled the
@@ -1943,6 +1951,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
struct net_device *dev = vi->dev;
struct sk_buff *skb;
struct virtio_net_common_hdr *hdr;
+ u8 flags;
if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
@@ -1951,6 +1960,15 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
return;
}
+ /* 1. Save the flags early, as the XDP program might overwrite them.
+ * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
+ * stay valid after XDP processing.
+ * 2. XDP doesn't work with partially checksummed packets (refer to
+ * virtnet_xdp_set()), so packets marked as
+ * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
+ */
+ flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
+
if (vi->mergeable_rx_bufs)
skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
stats);
@@ -1966,7 +1984,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);
- if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
+ if (flags & VIRTIO_NET_HDR_F_DATA_VALID)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
@@ -5666,8 +5684,16 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
/* (!csum && gso) case will be fixed by register_netdev() */
}
- if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
- dev->features |= NETIF_F_RXCSUM;
+
+ /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
+ * need to calculate checksums for partially checksummed packets,
+ * as they're considered valid by the upper layer.
+ * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
+ * receives fully checksummed packets. The device may assist in
+ * validating these packets' checksums, so the driver won't have to.
+ */
+ dev->features |= NETIF_F_RXCSUM;
+
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
dev->features |= NETIF_F_GRO_HW;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 567cb3faab70..ba59e92ab941 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2339,7 +2339,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_key *pkey;
struct ip_tunnel_key key;
struct vxlan_dev *vxlan = netdev_priv(dev);
- const struct iphdr *old_iph = ip_hdr(skb);
+ const struct iphdr *old_iph;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
unsigned int pkt_len = skb->len;
@@ -2353,8 +2353,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
bool use_cache;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
+ bool no_eth_encap;
__be32 vni = 0;
+ no_eth_encap = flags & VXLAN_F_GPE && skb->protocol != htons(ETH_P_TEB);
+ if (!skb_vlan_inet_prepare(skb, no_eth_encap))
+ goto drop;
+
+ old_iph = ip_hdr(skb);
+
info = skb_tunnel_info(skb);
use_cache = ip_tunnel_dst_cache_usable(skb, info);
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 0ba714ca5185..4b8528206cc8 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits)
if (bits == 32) {
*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
} else if (bits == 128) {
- ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
- ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
+ ((u64 *)dst)[0] = get_unaligned_be64(src);
+ ((u64 *)dst)[1] = get_unaligned_be64(src + 8);
}
}
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 1ea4f874e367..7eb76724b3ed 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
*/
static inline int wg_cpumask_next_online(int *last_cpu)
{
- int cpu = cpumask_next(*last_cpu, cpu_online_mask);
+ int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_online_mask);
- *last_cpu = cpu;
+ WRITE_ONCE(*last_cpu, cpu);
return cpu;
}
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 0d48e0f4a1ba..26e09c30d596 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
{
struct sk_buff *skb;
- if (skb_queue_empty(&peer->staged_packet_queue)) {
+ if (skb_queue_empty_lockless(&peer->staged_packet_queue)) {
skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
GFP_ATOMIC);
if (unlikely(!skb))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index de9f0b446545..dac6155ae1bd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -654,7 +654,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES;
if (iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_KEK_KCK_MATERIAL,
- IWL_FW_CMD_VER_UNKNOWN) == 3)
+ IWL_FW_CMD_VER_UNKNOWN) >= 3)
hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK;
if (fw_has_api(&mvm->fw->ucode_capa,
@@ -1656,7 +1656,8 @@ static void iwl_mvm_prevent_esr_done_wk(struct wiphy *wiphy,
struct iwl_mvm_vif *mvmvif =
container_of(wk, struct iwl_mvm_vif, prevent_esr_done_wk.work);
struct iwl_mvm *mvm = mvmvif->mvm;
- struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
+ struct ieee80211_vif *vif =
+ container_of((void *)mvmvif, struct ieee80211_vif, drv_priv);
mutex_lock(&mvm->mutex);
iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_PREVENTION);
@@ -1682,7 +1683,8 @@ static void iwl_mvm_unblock_esr_tpt(struct wiphy *wiphy, struct wiphy_work *wk)
struct iwl_mvm_vif *mvmvif =
container_of(wk, struct iwl_mvm_vif, unblock_esr_tpt_wk);
struct iwl_mvm *mvm = mvmvif->mvm;
- struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
+ struct ieee80211_vif *vif =
+ container_of((void *)mvmvif, struct ieee80211_vif, drv_priv);
mutex_lock(&mvm->mutex);
iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_TPT);
@@ -4795,7 +4797,7 @@ static int iwl_mvm_roc_station(struct iwl_mvm *mvm,
if (fw_ver == IWL_FW_CMD_VER_UNKNOWN) {
ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, vif, duration);
- } else if (fw_ver == 3) {
+ } else if (fw_ver >= 3) {
ret = iwl_mvm_roc_add_cmd(mvm, channel, vif, duration,
ROC_ACTIVITY_HOTSPOT);
} else {
@@ -6410,11 +6412,9 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
if (sync) {
lockdep_assert_held(&mvm->mutex);
ret = wait_event_timeout(mvm->rx_sync_waitq,
- READ_ONCE(mvm->queue_sync_state) == 0 ||
- iwl_mvm_is_radio_hw_killed(mvm),
+ READ_ONCE(mvm->queue_sync_state) == 0,
SYNC_RX_QUEUE_TIMEOUT);
- WARN_ONCE(!ret && !iwl_mvm_is_radio_hw_killed(mvm),
- "queue sync: failed to sync, state is 0x%lx, cookie %d\n",
+ WARN_ONCE(!ret, "queue sync: failed to sync, state is 0x%lx, cookie %d\n",
mvm->queue_sync_state,
mvm->queue_sync_cookie);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 53283d052e18..d343432474db 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -153,7 +153,7 @@ static void iwl_mvm_rx_esr_mode_notif(struct iwl_mvm *mvm,
struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
/* FW recommendations is only for entering EMLSR */
- if (!vif || iwl_mvm_vif_from_mac80211(vif)->esr_active)
+ if (IS_ERR_OR_NULL(vif) || iwl_mvm_vif_from_mac80211(vif)->esr_active)
return;
if (le32_to_cpu(notif->action) == ESR_RECOMMEND_ENTER)
@@ -1912,12 +1912,10 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
bool rfkill_safe_init_done = READ_ONCE(mvm->rfkill_safe_init_done);
bool unified = iwl_mvm_has_unified_ucode(mvm);
- if (state) {
+ if (state)
set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
- wake_up(&mvm->rx_sync_waitq);
- } else {
+ else
clear_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
- }
iwl_mvm_set_rfkill_state(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index 4fa8066a89b6..6e933907f985 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -557,12 +557,10 @@ struct iwl_mvm_stat_data_all_macs {
};
static void iwl_mvm_update_link_sig(struct ieee80211_vif *vif, int sig,
- struct iwl_mvm_vif_link_info *link_info)
+ struct iwl_mvm_vif_link_info *link_info,
+ struct ieee80211_bss_conf *bss_conf)
{
struct iwl_mvm *mvm = iwl_mvm_vif_from_mac80211(vif)->mvm;
- struct ieee80211_bss_conf *bss_conf =
- iwl_mvm_rcu_fw_link_id_to_link_conf(mvm, link_info->fw_link_id,
- false);
int thold = bss_conf->cqm_rssi_thold;
int hyst = bss_conf->cqm_rssi_hyst;
int last_event;
@@ -670,7 +668,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac,
mvmvif->deflink.beacon_stats.num_beacons;
/* This is used in pre-MLO API so use deflink */
- iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink);
+ iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf);
}
static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac,
@@ -705,7 +703,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac,
sig = -le32_to_cpu(mac_stats->beacon_filter_average_energy);
/* This is used in pre-MLO API so use deflink */
- iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink);
+ iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf);
}
static inline void
@@ -921,7 +919,8 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm,
mvmvif->link[link_id]->beacon_stats.num_beacons;
sig = -le32_to_cpu(link_stats->beacon_filter_average_energy);
- iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info);
+ iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info,
+ bss_conf);
if (WARN_ONCE(mvmvif->id >= MAC_INDEX_AUX,
"invalid mvmvif id: %d", mvmvif->id))
@@ -967,7 +966,7 @@ static void iwl_mvm_update_esr_mode_tpt(struct iwl_mvm *mvm)
lockdep_assert_held(&mvm->mutex);
- if (!bss_vif)
+ if (IS_ERR_OR_NULL(bss_vif))
return;
mvmvif = iwl_mvm_vif_from_mac80211(bss_vif);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index b5f664ae5a17..e975f5ff17b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1830,7 +1830,7 @@ iwl_mvm_umac_scan_cfg_channels_v7_6g(struct iwl_mvm *mvm,
*/
if (!iwl_mvm_is_scan_fragmented(params->type)) {
if (!cfg80211_channel_is_psc(params->channels[i]) ||
- flags & IWL_UHB_CHAN_CFG_FLAG_PSC_CHAN_NO_LISTEN) {
+ psc_no_listen) {
if (unsolicited_probe_on_chan) {
max_s_ssids = 2;
max_bssids = 6;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 8ee4498f4245..31bc80cdcb7d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -1238,6 +1238,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (te_data->id >= SESSION_PROTECT_CONF_MAX_ID) {
IWL_DEBUG_TE(mvm,
"No remain on channel event\n");
+ mutex_unlock(&mvm->mutex);
return;
}
@@ -1253,6 +1254,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
te_data = iwl_mvm_get_roc_te(mvm);
if (!te_data) {
IWL_WARN(mvm, "No remain on channel event\n");
+ mutex_unlock(&mvm->mutex);
return;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index f1085ccb7eed..7719e4f3e2a2 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -382,7 +382,8 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
struct ieee80211_p2p_noa_attr noa_attr;
const struct cfg80211_bss_ies *ies;
struct wilc_join_bss_param *param;
- u8 rates_len = 0, ies_len;
+ u8 rates_len = 0;
+ int ies_len;
int ret;
param = kzalloc(sizeof(*param), GFP_KERNEL);
diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
index a939fd89a7f5..92fc2d456c2c 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.c
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
@@ -1566,13 +1566,6 @@ int wl12xx_cmd_add_peer(struct wl1271 *wl, struct wl12xx_vif *wlvif,
cpu_to_le32(wl1271_tx_enabled_rates_get(wl, sta_rates,
wlvif->band));
- if (!cmd->supported_rates) {
- wl1271_debug(DEBUG_CMD,
- "peer has no supported rates yet, configuring basic rates: 0x%x",
- wlvif->basic_rate_set);
- cmd->supported_rates = cpu_to_le32(wlvif->basic_rate_set);
- }
-
wl1271_debug(DEBUG_CMD, "new peer rates=0x%x queues=0x%x",
cmd->supported_rates, sta->uapsd_queues);
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index ef12169f8044..492cd7aef44f 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5139,19 +5139,23 @@ static int wl12xx_update_sta_state(struct wl1271 *wl,
/* Add station (AP mode) */
if (is_ap &&
- old_state == IEEE80211_STA_NOTEXIST &&
- new_state == IEEE80211_STA_NONE) {
+ old_state == IEEE80211_STA_AUTH &&
+ new_state == IEEE80211_STA_ASSOC) {
ret = wl12xx_sta_add(wl, wlvif, sta);
if (ret)
return ret;
+ wl_sta->fw_added = true;
+
wlcore_update_inconn_sta(wl, wlvif, wl_sta, true);
}
/* Remove station (AP mode) */
if (is_ap &&
- old_state == IEEE80211_STA_NONE &&
- new_state == IEEE80211_STA_NOTEXIST) {
+ old_state == IEEE80211_STA_ASSOC &&
+ new_state == IEEE80211_STA_AUTH) {
+ wl_sta->fw_added = false;
+
/* must not fail */
wl12xx_sta_remove(wl, wlvif, sta);
@@ -5165,11 +5169,6 @@ static int wl12xx_update_sta_state(struct wl1271 *wl,
if (ret < 0)
return ret;
- /* reconfigure rates */
- ret = wl12xx_cmd_add_peer(wl, wlvif, sta, wl_sta->hlid);
- if (ret < 0)
- return ret;
-
ret = wl1271_acx_set_ht_capabilities(wl, &sta->deflink.ht_cap,
true,
wl_sta->hlid);
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index 7bd3ce2f0804..464587d16ab2 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -140,11 +140,8 @@ EXPORT_SYMBOL(wl12xx_is_dummy_packet);
static u8 wl12xx_tx_get_hlid_ap(struct wl1271 *wl, struct wl12xx_vif *wlvif,
struct sk_buff *skb, struct ieee80211_sta *sta)
{
- if (sta) {
- struct wl1271_station *wl_sta;
-
- wl_sta = (struct wl1271_station *)sta->drv_priv;
- return wl_sta->hlid;
+ if (sta && wl1271_station(sta)->fw_added) {
+ return wl1271_station(sta)->hlid;
} else {
struct ieee80211_hdr *hdr;
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index eefae3f867b9..817a8a61cac6 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -324,6 +324,7 @@ struct wl12xx_rx_filter {
struct wl1271_station {
u8 hlid;
+ bool fw_added;
bool in_connection;
/*
@@ -335,6 +336,11 @@ struct wl1271_station {
u64 total_freed_pkts;
};
+static inline struct wl1271_station *wl1271_station(struct ieee80211_sta *sta)
+{
+ return (struct wl1271_station *)sta->drv_priv;
+}
+
struct wl12xx_vif {
struct wl1271 *wl;
struct list_head list;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 1e5aedaf8c7b..e79c06d65bb7 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1501,9 +1501,15 @@ static int btt_blk_init(struct btt *btt)
.logical_block_size = btt->sector_size,
.max_hw_sectors = UINT_MAX,
.max_integrity_segments = 1,
+ .features = BLK_FEAT_SYNCHRONOUS,
};
int rc;
+ if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
+ lim.integrity.tuple_size = btt_meta_size(btt);
+ lim.integrity.tag_size = btt_meta_size(btt);
+ }
+
btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
if (IS_ERR(btt->btt_disk))
return PTR_ERR(btt->btt_disk);
@@ -1513,17 +1519,6 @@ static int btt_blk_init(struct btt *btt)
btt->btt_disk->fops = &btt_fops;
btt->btt_disk->private_data = btt;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue);
-
- if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
- struct blk_integrity bi = {
- .tuple_size = btt_meta_size(btt),
- .tag_size = btt_meta_size(btt),
- };
- blk_integrity_register(btt->btt_disk, &bi);
- }
-
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
if (rc)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 598fe2e89bda..1dd74c969d5a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -455,6 +455,8 @@ static int pmem_attach_disk(struct device *dev,
.logical_block_size = pmem_sector_size(ndns),
.physical_block_size = PAGE_SIZE,
.max_hw_sectors = UINT_MAX,
+ .features = BLK_FEAT_WRITE_CACHE |
+ BLK_FEAT_SYNCHRONOUS,
};
int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
@@ -463,7 +465,6 @@ static int pmem_attach_disk(struct device *dev,
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
- struct request_queue *q;
struct gendisk *disk;
void *addr;
int rc;
@@ -495,6 +496,10 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0;
}
+ if (fua)
+ lim.features |= BLK_FEAT_FUA;
+ if (is_nd_pfn(dev))
+ lim.features |= BLK_FEAT_DAX;
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) {
@@ -505,7 +510,6 @@ static int pmem_attach_disk(struct device *dev,
disk = blk_alloc_disk(&lim, nid);
if (IS_ERR(disk))
return PTR_ERR(disk);
- q = disk->queue;
pmem->disk = disk;
pmem->pgmap.owner = pmem;
@@ -543,12 +547,6 @@ static int pmem_attach_disk(struct device *dev,
}
pmem->virt_addr = addr;
- blk_queue_write_cache(q, true, fua);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q);
- if (pmem->pfn_flags & PFN_MAP)
- blk_queue_flag_set(QUEUE_FLAG_DAX, q);
-
disk->fops = &pmem_fops;
disk->private_data = pmem;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b309c8be720f..a3caef75aa0a 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config NVME_CORE
tristate
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
config BLK_DEV_NVME
tristate "NVM Express block device"
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index dd6ec0865141..b1387dc459a3 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1388,7 +1388,7 @@ static void devm_apple_nvme_mempool_destroy(void *data)
mempool_destroy(data);
}
-static int apple_nvme_probe(struct platform_device *pdev)
+static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct apple_nvme *anv;
@@ -1396,7 +1396,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL);
if (!anv)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
anv->dev = get_device(dev);
anv->adminq.is_adminq = true;
@@ -1516,10 +1516,30 @@ static int apple_nvme_probe(struct platform_device *pdev)
goto put_dev;
}
+ return anv;
+put_dev:
+ put_device(anv->dev);
+ return ERR_PTR(ret);
+}
+
+static int apple_nvme_probe(struct platform_device *pdev)
+{
+ struct apple_nvme *anv;
+ int ret;
+
+ anv = apple_nvme_alloc(pdev);
+ if (IS_ERR(anv))
+ return PTR_ERR(anv);
+
+ ret = nvme_add_ctrl(&anv->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
if (IS_ERR(anv->ctrl.admin_q)) {
ret = -ENOMEM;
- goto put_dev;
+ anv->ctrl.admin_q = NULL;
+ goto out_uninit_ctrl;
}
nvme_reset_ctrl(&anv->ctrl);
@@ -1527,8 +1547,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
return 0;
-put_dev:
- put_device(anv->dev);
+out_uninit_ctrl:
+ nvme_uninit_ctrl(&anv->ctrl);
+out_put_ctrl:
+ nvme_put_ctrl(&anv->ctrl);
return ret;
}
@@ -1602,4 +1624,5 @@ static struct platform_driver apple_nvme_driver = {
module_platform_driver(apple_nvme_driver);
MODULE_AUTHOR("Sven Peter <[email protected]>");
+MODULE_DESCRIPTION("Apple ANS NVM Express device driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index 6f2ebb5fcdb0..2b9e6cfaf2a8 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -173,7 +173,7 @@ static const char * const nvme_statuses[] = {
const char *nvme_get_error_status_str(u16 status)
{
- status &= 0x7ff;
+ status &= NVME_SCT_SC_MASK;
if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
return nvme_statuses[status];
return "Unknown";
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 782090ce0bc1..19917253ba7b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -110,7 +110,7 @@ struct workqueue_struct *nvme_delete_wq;
EXPORT_SYMBOL_GPL(nvme_delete_wq);
static LIST_HEAD(nvme_subsystems);
-static DEFINE_MUTEX(nvme_subsystems_lock);
+DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt;
@@ -261,7 +261,7 @@ void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
static blk_status_t nvme_error_status(u16 status)
{
- switch (status & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_SUCCESS:
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
@@ -307,7 +307,7 @@ static void nvme_retry_req(struct request *req)
u16 crd;
/* The mask and shift result must be <= 3 */
- crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
+ crd = (nvme_req(req)->status & NVME_STATUS_CRD) >> 11;
if (crd)
delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100;
@@ -329,10 +329,10 @@ static void nvme_log_error(struct request *req)
nvme_sect_to_lba(ns->head, blk_rq_pos(req)),
blk_rq_bytes(req) >> ns->head->lba_shift,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "");
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "");
return;
}
@@ -341,10 +341,10 @@ static void nvme_log_error(struct request *req)
nvme_get_admin_opcode_str(nr->cmd->common.opcode),
nr->cmd->common.opcode,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "");
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "");
}
static void nvme_log_err_passthru(struct request *req)
@@ -359,10 +359,10 @@ static void nvme_log_err_passthru(struct request *req)
nvme_get_admin_opcode_str(nr->cmd->common.opcode),
nr->cmd->common.opcode,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "",
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "",
nr->cmd->common.cdw10,
nr->cmd->common.cdw11,
nr->cmd->common.cdw12,
@@ -384,11 +384,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
return COMPLETE;
if (blk_noretry_request(req) ||
- (nvme_req(req)->status & NVME_SC_DNR) ||
+ (nvme_req(req)->status & NVME_STATUS_DNR) ||
nvme_req(req)->retries >= nvme_max_retries)
return COMPLETE;
- if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
+ if ((nvme_req(req)->status & NVME_SCT_SC_MASK) == NVME_SC_AUTH_REQUIRED)
return AUTHENTICATE;
if (req->cmd_flags & REQ_NVME_MPATH) {
@@ -927,6 +927,36 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
return BLK_STS_OK;
}
+/*
+ * NVMe does not support a dedicated command to issue an atomic write. A write
+ * which does adhere to the device atomic limits will silently be executed
+ * non-atomically. The request issuer should ensure that the write is within
+ * the queue atomic writes limits, but just validate this in case it is not.
+ */
+static bool nvme_valid_atomic_write(struct request *req)
+{
+ struct request_queue *q = req->q;
+ u32 boundary_bytes = queue_atomic_write_boundary_bytes(q);
+
+ if (blk_rq_bytes(req) > queue_atomic_write_unit_max_bytes(q))
+ return false;
+
+ if (boundary_bytes) {
+ u64 mask = boundary_bytes - 1, imask = ~mask;
+ u64 start = blk_rq_pos(req) << SECTOR_SHIFT;
+ u64 end = start + blk_rq_bytes(req) - 1;
+
+ /* If greater then must be crossing a boundary */
+ if (blk_rq_bytes(req) > boundary_bytes)
+ return false;
+
+ if ((start & imask) != (end & imask))
+ return false;
+ }
+
+ return true;
+}
+
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
@@ -942,6 +972,9 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+ if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req))
+ return BLK_STS_INVAL;
+
cmnd->rw.opcode = op;
cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
@@ -1224,7 +1257,7 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
/*
* Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
- *
+ *
* The host should send Keep Alive commands at half of the Keep Alive Timeout
* accounting for transport roundtrip times [..].
*/
@@ -1724,11 +1757,12 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
+static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head,
+ struct queue_limits *lim)
{
- struct blk_integrity integrity = { };
+ struct blk_integrity *bi = &lim->integrity;
- blk_integrity_unregister(disk);
+ memset(bi, 0, sizeof(*bi));
if (!head->ms)
return true;
@@ -1745,17 +1779,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
case NVME_NS_DPS_PI_TYPE3:
switch (head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
- integrity.profile = &t10_pi_type3_crc;
- integrity.tag_size = sizeof(u16) + sizeof(u32);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
+ bi->tag_size = sizeof(u16) + sizeof(u32);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
case NVME_NVM_NS_64B_GUARD:
- integrity.profile = &ext_pi_type3_crc64;
- integrity.tag_size = sizeof(u16) + 6;
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC64;
+ bi->tag_size = sizeof(u16) + 6;
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
default:
- integrity.profile = NULL;
break;
}
break;
@@ -1763,28 +1796,27 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
case NVME_NS_DPS_PI_TYPE2:
switch (head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
- integrity.profile = &t10_pi_type1_crc;
- integrity.tag_size = sizeof(u16);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
+ bi->tag_size = sizeof(u16);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE |
+ BLK_INTEGRITY_REF_TAG;
break;
case NVME_NVM_NS_64B_GUARD:
- integrity.profile = &ext_pi_type1_crc64;
- integrity.tag_size = sizeof(u16);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC64;
+ bi->tag_size = sizeof(u16);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE |
+ BLK_INTEGRITY_REF_TAG;
break;
default:
- integrity.profile = NULL;
break;
}
break;
default:
- integrity.profile = NULL;
break;
}
- integrity.tuple_size = head->ms;
- integrity.pi_offset = head->pi_offset;
- blk_integrity_register(disk, &integrity);
+ bi->tuple_size = head->ms;
+ bi->pi_offset = head->pi_offset;
return true;
}
@@ -1922,6 +1954,23 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
}
+
+static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim,
+ u32 bs, u32 atomic_bs)
+{
+ unsigned int boundary = 0;
+
+ if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
+ if (le16_to_cpu(id->nabspf))
+ boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ }
+ lim->atomic_write_hw_max = atomic_bs;
+ lim->atomic_write_hw_boundary = boundary;
+ lim->atomic_write_hw_unit_min = bs;
+ lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
+}
+
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
{
return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
@@ -1968,13 +2017,16 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+
+ nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
}
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
phys_bs = bs * (1 + le16_to_cpu(id->npwg));
/* NOWS = Namespace Optimal Write Size */
- io_opt = bs * (1 + le16_to_cpu(id->nows));
+ if (id->nows)
+ io_opt = bs * (1 + le16_to_cpu(id->nows));
}
/*
@@ -2058,7 +2110,6 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
- bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_zone_info zi = {};
@@ -2107,11 +2158,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);
- ret = queue_limits_commit_update(ns->disk->queue, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
+
+ if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
+ else
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
@@ -2119,9 +2170,15 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
- if (!nvme_init_integrity(ns->disk, ns->head))
+ if (!nvme_init_integrity(ns->disk, ns->head, &lim))
capacity = 0;
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
+ if (ret) {
+ blk_mq_unfreeze_queue(ns->disk->queue);
+ goto out;
+ }
+
set_capacity_and_notify(ns->disk, capacity);
/*
@@ -2133,7 +2190,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
- blk_queue_write_cache(ns->disk->queue, vwc, vwc);
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2193,14 +2249,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
- if (unsupported)
- ns->head->disk->flags |= GENHD_FL_HIDDEN;
- else
- nvme_init_integrity(ns->head->disk, ns->head);
- set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
- set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
-
/*
* queue_limits mixes values that are the hardware limitations
* for bio splitting with what is the device configuration.
@@ -2223,13 +2271,48 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
+ if (unsupported)
+ ns->head->disk->flags |= GENHD_FL_HIDDEN;
+ else
+ nvme_init_integrity(ns->head->disk, ns->head, &lim);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
+
+ set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
+ set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
+ nvme_mpath_revalidate_paths(ns);
+
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
return ret;
}
+int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
+ enum blk_unique_id type)
+{
+ struct nvme_ns_ids *ids = &ns->head->ids;
+
+ if (type != BLK_UID_EUI64)
+ return -EINVAL;
+
+ if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) {
+ memcpy(id, &ids->nguid, sizeof(ids->nguid));
+ return sizeof(ids->nguid);
+ }
+ if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) {
+ memcpy(id, &ids->eui64, sizeof(ids->eui64));
+ return sizeof(ids->eui64);
+ }
+
+ return -EINVAL;
+}
+
+static int nvme_get_unique_id(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id type)
+{
+ return nvme_ns_get_unique_id(disk->private_data, id, type);
+}
+
#ifdef CONFIG_BLK_SED_OPAL
static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
@@ -2285,6 +2368,7 @@ const struct block_device_operations nvme_bdev_ops = {
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
+ .get_unique_id = nvme_get_unique_id,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};
@@ -3721,6 +3805,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
{
+ struct queue_limits lim = { };
struct nvme_ns *ns;
struct gendisk *disk;
int node = ctrl->numa_node;
@@ -3729,7 +3814,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (!ns)
return;
- disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns);
+ if (ctrl->opts && ctrl->opts->data_digest)
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+ if (ctrl->ops->supports_pci_p2pdma &&
+ ctrl->ops->supports_pci_p2pdma(ctrl))
+ lim.features |= BLK_FEAT_PCI_P2PDMA;
+
+ disk = blk_mq_alloc_disk(ctrl->tagset, &lim, ns);
if (IS_ERR(disk))
goto out_free_ns;
disk->fops = &nvme_bdev_ops;
@@ -3737,15 +3828,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
ns->disk = disk;
ns->queue = disk->queue;
-
- if (ctrl->opts && ctrl->opts->data_digest)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
- if (ctrl->ops->supports_pci_p2pdma &&
- ctrl->ops->supports_pci_p2pdma(ctrl))
- blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
-
ns->ctrl = ctrl;
kref_init(&ns->kref);
@@ -3887,7 +3969,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
{
- int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ int ret = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
dev_err(ns->ctrl->device,
@@ -3903,7 +3985,7 @@ out:
*
* TODO: we should probably schedule a delayed retry here.
*/
- if (ret > 0 && (ret & NVME_SC_DNR))
+ if (ret > 0 && (ret & NVME_STATUS_DNR))
nvme_ns_remove(ns);
}
@@ -4095,7 +4177,7 @@ static void nvme_scan_work(struct work_struct *work)
* they report) but don't actually support it.
*/
ret = nvme_scan_ns_list(ctrl);
- if (ret > 0 && ret & NVME_SC_DNR)
+ if (ret > 0 && ret & NVME_STATUS_DNR)
nvme_scan_ns_sequential(ctrl);
}
mutex_unlock(&ctrl->scan_lock);
@@ -4489,13 +4571,15 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
return ret;
if (ctrl->ops->flags & NVME_F_FABRICS) {
- ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL);
+ struct queue_limits lim = {
+ .features = BLK_FEAT_SKIP_TAGSET_QUIESCE,
+ };
+
+ ctrl->connect_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->connect_q)) {
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
- blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE,
- ctrl->connect_q);
}
ctrl->tagset = set;
@@ -4613,6 +4697,9 @@ static void nvme_free_ctrl(struct device *dev)
* Initialize a NVMe controller structures. This needs to be called during
* earliest initialization so that we have the initialized structured around
* during probing.
+ *
+ * On success, the caller must use the nvme_put_ctrl() to release this when
+ * needed, which also invokes the ops->free_ctrl() callback.
*/
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks)
@@ -4661,6 +4748,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
goto out;
ctrl->instance = ret;
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out_release_instance;
+
+ nvme_mpath_init_ctrl(ctrl);
+
device_initialize(&ctrl->ctrl_device);
ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
@@ -4673,16 +4766,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
+
+ return ret;
+
+out_release_instance:
+ ida_free(&nvme_instance_ida, ctrl->instance);
+out:
+ if (ctrl->discard_page)
+ __free_page(ctrl->discard_page);
+ cleanup_srcu_struct(&ctrl->srcu);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+
+/*
+ * On success, returns with an elevated controller reference and caller must
+ * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
+ */
+int nvme_add_ctrl(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
if (ret)
- goto out_release_instance;
+ return ret;
- nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
- ctrl->cdev.owner = ops->module;
+ ctrl->cdev.owner = ctrl->ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
if (ret)
- goto out_free_name;
+ return ret;
/*
* Initialize latency tolerance controls. The sysfs files won't
@@ -4693,28 +4806,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
- nvme_mpath_init_ctrl(ctrl);
- ret = nvme_auth_init_ctrl(ctrl);
- if (ret)
- goto out_free_cdev;
+ nvme_get_ctrl(ctrl);
return 0;
-out_free_cdev:
- nvme_fault_inject_fini(&ctrl->fault_inject);
- dev_pm_qos_hide_latency_tolerance(ctrl->device);
- cdev_device_del(&ctrl->cdev, ctrl->device);
-out_free_name:
- nvme_put_ctrl(ctrl);
- kfree_const(ctrl->device->kobj.name);
-out_release_instance:
- ida_free(&nvme_instance_ida, ctrl->instance);
-out:
- if (ctrl->discard_page)
- __free_page(ctrl->discard_page);
- cleanup_srcu_struct(&ctrl->srcu);
- return ret;
}
-EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+EXPORT_SYMBOL_GPL(nvme_add_ctrl);
/* let I/O to all namespaces fail in preparation for surprise removal */
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index ceb9c0ed3120..44e342a46f39 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -187,7 +187,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
if (unlikely(ret != 0))
dev_err(ctrl->device,
"Property Get error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
@@ -233,7 +233,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
if (unlikely(ret != 0))
dev_err(ctrl->device,
"Property Get error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
EXPORT_SYMBOL_GPL(nvmf_reg_read64);
@@ -275,11 +275,26 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
if (unlikely(ret))
dev_err(ctrl->device,
"Property Set error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
EXPORT_SYMBOL_GPL(nvmf_reg_write32);
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ if (!nvme_wait_reset(ctrl))
+ return -EBUSY;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET);
+ if (ret)
+ return ret;
+
+ return nvme_try_sched_reset(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_subsystem_reset);
+
/**
* nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
* connect() errors.
@@ -295,7 +310,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
int errval, int offset, struct nvme_command *cmd,
struct nvmf_connect_data *data)
{
- int err_sctype = errval & ~NVME_SC_DNR;
+ int err_sctype = errval & ~NVME_STATUS_DNR;
if (errval < 0) {
dev_err(ctrl->device,
@@ -573,7 +588,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
*/
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
{
- if (status > 0 && (status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_STATUS_DNR))
return false;
if (status == -EKEYREJECTED)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 602135910ae9..21d75dc4a3a0 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -217,6 +217,7 @@ static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 1ba10a5c656d..1d1b6441a339 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -75,7 +75,7 @@ void nvme_should_fail(struct request *req)
/* inject status code and DNR bit */
status = fault_inject->status;
if (fault_inject->dont_retry)
- status |= NVME_SC_DNR;
+ status |= NVME_STATUS_DNR;
nvme_req(req)->status = status;
}
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f0b081332749..b81af7919e94 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3132,7 +3132,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
ctrl->ctrl.icdoff);
- ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out_stop_keep_alive;
}
@@ -3140,7 +3140,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) {
dev_err(ctrl->ctrl.device,
"Mandatory sgls are not supported!\n");
- ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out_stop_keep_alive;
}
@@ -3325,7 +3325,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
} else {
if (portptr->port_state == FC_OBJSTATE_ONLINE) {
- if (status > 0 && (status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_STATUS_DNR))
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: reconnect failure\n",
ctrl->cnum);
@@ -3382,6 +3382,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
@@ -3444,12 +3445,11 @@ nvme_fc_existing_controller(struct nvme_fc_rport *rport,
return found;
}
-static struct nvme_ctrl *
-nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+static struct nvme_fc_ctrl *
+nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
{
struct nvme_fc_ctrl *ctrl;
- unsigned long flags;
int ret, idx, ctrl_loss_tmo;
if (!(rport->remoteport.port_role &
@@ -3538,7 +3538,35 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
if (lport->dev)
ctrl->ctrl.numa_node = dev_to_node(lport->dev);
- /* at this point, teardown path changes to ref counting on nvme ctrl */
+ return ctrl;
+
+out_free_queues:
+ kfree(ctrl->queues);
+out_free_ida:
+ put_device(ctrl->dev);
+ ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_free_ctrl:
+ kfree(ctrl);
+out_fail:
+ /* exit via here doesn't follow ctlr ref points */
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+ struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
+{
+ struct nvme_fc_ctrl *ctrl;
+ unsigned long flags;
+ int ret;
+
+ ctrl = nvme_fc_alloc_ctrl(dev, opts, lport, rport);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
&nvme_fc_admin_mq_ops,
@@ -3584,6 +3612,7 @@ fail_ctrl:
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
@@ -3597,20 +3626,8 @@ fail_ctrl:
nvme_fc_rport_get(rport);
return ERR_PTR(-EIO);
-
-out_free_queues:
- kfree(ctrl->queues);
-out_free_ida:
- put_device(ctrl->dev);
- ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
-out_free_ctrl:
- kfree(ctrl);
-out_fail:
- /* exit via here doesn't follow ctlr ref points */
- return ERR_PTR(ret);
}
-
struct nvmet_fc_traddr {
u64 nn;
u64 pn;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d8b6b4648eaf..91d9eb3c22ef 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -17,6 +17,7 @@ MODULE_PARM_DESC(multipath,
static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
+ [NVME_IOPOLICY_QD] = "queue-depth",
};
static int iopolicy = NVME_IOPOLICY_NUMA;
@@ -29,6 +30,8 @@ static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
iopolicy = NVME_IOPOLICY_NUMA;
else if (!strncmp(val, "round-robin", 11))
iopolicy = NVME_IOPOLICY_RR;
+ else if (!strncmp(val, "queue-depth", 11))
+ iopolicy = NVME_IOPOLICY_QD;
else
return -EINVAL;
@@ -43,7 +46,7 @@ static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
&iopolicy, 0644);
MODULE_PARM_DESC(iopolicy,
- "Default multipath I/O policy; 'numa' (default) or 'round-robin'");
+ "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
{
@@ -83,7 +86,7 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
- u16 status = nvme_req(req)->status & 0x7ff;
+ u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
unsigned long flags;
struct bio *bio;
@@ -128,6 +131,11 @@ void nvme_mpath_start_request(struct request *rq)
struct nvme_ns *ns = rq->q->queuedata;
struct gendisk *disk = ns->head->disk;
+ if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) {
+ atomic_inc(&ns->ctrl->nr_active);
+ nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
+ }
+
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
return;
@@ -141,6 +149,9 @@ void nvme_mpath_end_request(struct request *rq)
{
struct nvme_ns *ns = rq->q->queuedata;
+ if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)
+ atomic_dec_if_positive(&ns->ctrl->nr_active);
+
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
@@ -291,10 +302,15 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
}
-static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
- int node, struct nvme_ns *old)
+static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns, *found = NULL;
+ int node = numa_node_id();
+ struct nvme_ns *old = srcu_dereference(head->current_path[node],
+ &head->srcu);
+
+ if (unlikely(!old))
+ return __nvme_find_path(head, node);
if (list_is_singular(&head->list)) {
if (nvme_path_is_disabled(old))
@@ -334,13 +350,49 @@ out:
return found;
}
+static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head)
+{
+ struct nvme_ns *best_opt = NULL, *best_nonopt = NULL, *ns;
+ unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX;
+ unsigned int depth;
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (nvme_path_is_disabled(ns))
+ continue;
+
+ depth = atomic_read(&ns->ctrl->nr_active);
+
+ switch (ns->ana_state) {
+ case NVME_ANA_OPTIMIZED:
+ if (depth < min_depth_opt) {
+ min_depth_opt = depth;
+ best_opt = ns;
+ }
+ break;
+ case NVME_ANA_NONOPTIMIZED:
+ if (depth < min_depth_nonopt) {
+ min_depth_nonopt = depth;
+ best_nonopt = ns;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (min_depth_opt == 0)
+ return best_opt;
+ }
+
+ return best_opt ? best_opt : best_nonopt;
+}
+
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
ns->ana_state == NVME_ANA_OPTIMIZED;
}
-inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
{
int node = numa_node_id();
struct nvme_ns *ns;
@@ -348,14 +400,23 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
ns = srcu_dereference(head->current_path[node], &head->srcu);
if (unlikely(!ns))
return __nvme_find_path(head, node);
-
- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR)
- return nvme_round_robin_path(head, node, ns);
if (unlikely(!nvme_path_is_optimized(ns)))
return __nvme_find_path(head, node);
return ns;
}
+inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+{
+ switch (READ_ONCE(head->subsys->iopolicy)) {
+ case NVME_IOPOLICY_QD:
+ return nvme_queue_depth_path(head);
+ case NVME_IOPOLICY_RR:
+ return nvme_round_robin_path(head);
+ default:
+ return nvme_numa_path(head);
+ }
+}
+
static bool nvme_available_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;
@@ -427,6 +488,21 @@ static void nvme_ns_head_release(struct gendisk *disk)
nvme_put_ns_head(disk->private_data);
}
+static int nvme_ns_head_get_unique_id(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id type)
+{
+ struct nvme_ns_head *head = disk->private_data;
+ struct nvme_ns *ns;
+ int srcu_idx, ret = -EWOULDBLOCK;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (ns)
+ ret = nvme_ns_get_unique_id(ns, id, type);
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
#ifdef CONFIG_BLK_DEV_ZONED
static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
@@ -454,6 +530,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.ioctl = nvme_ns_head_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = nvme_getgeo,
+ .get_unique_id = nvme_ns_head_get_unique_id,
.report_zones = nvme_ns_head_report_zones,
.pr_ops = &nvme_pr_ops,
};
@@ -521,7 +598,6 @@ static void nvme_requeue_work(struct work_struct *work)
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{
struct queue_limits lim;
- bool vwc = false;
mutex_init(&head->lock);
bio_list_init(&head->requeue_list);
@@ -539,6 +615,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_set_stacking_limits(&lim);
lim.dma_alignment = 3;
+ lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
if (head->ids.csi != NVME_CSI_ZNS)
lim.max_zone_append_sectors = 0;
@@ -549,24 +626,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
head->disk->private_data = head;
sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue);
- /*
- * This assumes all controllers that refer to a namespace either
- * support poll queues or not. That is not a strict guarantee,
- * but if the assumption is wrong the effect is only suboptimal
- * performance but not correctness problem.
- */
- if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL &&
- ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
- blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
-
- /* we need to propagate up the VMC settings */
- if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
- vwc = true;
- blk_queue_write_cache(head->disk->queue, vwc, vwc);
return 0;
}
@@ -803,6 +862,29 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
}
+static void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys,
+ int iopolicy)
+{
+ struct nvme_ctrl *ctrl;
+ int old_iopolicy = READ_ONCE(subsys->iopolicy);
+
+ if (old_iopolicy == iopolicy)
+ return;
+
+ WRITE_ONCE(subsys->iopolicy, iopolicy);
+
+ /* iopolicy changes clear the mpath by design */
+ mutex_lock(&nvme_subsystems_lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ nvme_mpath_clear_ctrl_paths(ctrl);
+ mutex_unlock(&nvme_subsystems_lock);
+
+ pr_notice("subsysnqn %s iopolicy changed from %s to %s\n",
+ subsys->subnqn,
+ nvme_iopolicy_names[old_iopolicy],
+ nvme_iopolicy_names[iopolicy]);
+}
+
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
@@ -812,7 +894,7 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
- WRITE_ONCE(subsys->iopolicy, i);
+ nvme_subsys_iopolicy_update(subsys, i);
return count;
}
}
@@ -875,9 +957,6 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
nvme_mpath_set_live(ns);
}
- if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
- ns->head->disk->queue);
#ifdef CONFIG_BLK_DEV_ZONED
if (blk_queue_is_zoned(ns->queue) && ns->head->disk)
ns->head->disk->nr_zones = ns->disk->nr_zones;
@@ -923,6 +1002,9 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
!(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
+ /* initialize this in the identify path to cover controller resets */
+ atomic_set(&ctrl->nr_active, 0);
+
if (!ctrl->max_namespaces ||
ctrl->max_namespaces > le32_to_cpu(id->nn)) {
dev_err(ctrl->device,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f3a41133ac3f..f900e44243ae 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -49,6 +49,7 @@ extern unsigned int admin_timeout;
extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
+extern struct mutex nvme_subsystems_lock;
/*
* List of workarounds for devices that required behavior not specified in
@@ -195,6 +196,7 @@ enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
+ NVME_MPATH_CNT_ACTIVE = (1 << 3),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -360,6 +362,7 @@ struct nvme_ctrl {
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
+ atomic_t nr_active;
#endif
#ifdef CONFIG_NVME_HOST_AUTH
@@ -408,6 +411,7 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
+ NVME_IOPOLICY_QD,
};
struct nvme_subsystem {
@@ -502,7 +506,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
- NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */
+ NVME_NS_DEAC = 1 << 2, /* DEAC bit in Write Zeores supported */
};
struct nvme_ns {
@@ -551,6 +555,7 @@ struct nvme_ctrl_ops {
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
+ int (*subsystem_reset)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@@ -649,18 +654,9 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
- int ret;
-
- if (!ctrl->subsystem)
+ if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
return -ENOTTY;
- if (!nvme_wait_reset(ctrl))
- return -EBUSY;
-
- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
- if (ret)
- return ret;
-
- return nvme_try_sched_reset(ctrl);
+ return ctrl->ops->subsystem_reset(ctrl);
}
/*
@@ -689,7 +685,7 @@ static inline u32 nvme_bytes_to_numd(size_t len)
static inline bool nvme_is_ana_error(u16 status)
{
- switch (status & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
case NVME_SC_ANA_PERSISTENT_LOSS:
@@ -702,7 +698,7 @@ static inline bool nvme_is_ana_error(u16 status)
static inline bool nvme_is_path_error(u16 status)
{
/* check for a status code type of 'path related status' */
- return (status & 0x700) == 0x300;
+ return (status & NVME_SCT_MASK) == NVME_SCT_PATH;
}
/*
@@ -792,6 +788,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
+int nvme_add_ctrl(struct nvme_ctrl *ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
@@ -877,7 +874,7 @@ enum {
NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1),
/* Set BLK_MQ_REQ_RESERVED when allocating request */
NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2),
- /* Retry command when NVME_SC_DNR is not set in the result */
+ /* Retry command when NVME_STATUS_DNR is not set in the result */
NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3),
};
@@ -1062,6 +1059,9 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
+int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
+ enum blk_unique_id type);
+
struct nvme_zone_info {
u64 zone_size;
unsigned int max_open_zones;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65f..21f8e1b9801f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -826,9 +826,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct bio_vec bv = rq_integrity_vec(req);
- iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
- rq_dma_dir(req), 0);
+ iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ rq_integrity_vec(req).bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
@@ -1143,6 +1143,41 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
spin_unlock(&nvmeq->sq_lock);
}
+static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+ int ret = 0;
+
+ /*
+ * Taking the shutdown_lock ensures the BAR mapping is not being
+ * altered by reset_work. Holding this lock before the RESETTING state
+ * change, if successful, also ensures nvme_remove won't be able to
+ * proceed to iounmap until we're done.
+ */
+ mutex_lock(&dev->shutdown_lock);
+ if (!dev->bar_mapped_size) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR);
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE);
+
+ /*
+ * Read controller status to flush the previous write and trigger a
+ * pcie read error.
+ */
+ readl(dev->bar + NVME_REG_CSTS);
+unlock:
+ mutex_unlock(&dev->shutdown_lock);
+ return ret;
+}
+
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
struct nvme_command c = { };
@@ -2859,6 +2894,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read64 = nvme_pci_reg_read64,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
+ .subsystem_reset = nvme_pci_subsystem_reset,
.get_address = nvme_pci_get_address,
.print_device_info = nvme_pci_print_device_info,
.supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma,
@@ -3015,6 +3051,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(dev))
return PTR_ERR(dev);
+ result = nvme_add_ctrl(&dev->ctrl);
+ if (result)
+ goto out_put_ctrl;
+
result = nvme_dev_map(dev);
if (result)
goto out_uninit_ctrl;
@@ -3101,6 +3141,7 @@ out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl:
nvme_uninit_ctrl(&dev->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&dev->ctrl);
return result;
}
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 8fa1ffcdaed4..7347ddf85f00 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -72,12 +72,12 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
return nvme_submit_sync_cmd(ns->queue, c, data, data_len);
}
-static int nvme_sc_to_pr_err(int nvme_sc)
+static int nvme_status_to_pr_err(int status)
{
- if (nvme_is_path_error(nvme_sc))
+ if (nvme_is_path_error(status))
return PR_STS_PATH_FAILED;
- switch (nvme_sc & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_SUCCESS:
return PR_STS_SUCCESS;
case NVME_SC_RESERVATION_CONFLICT:
@@ -121,7 +121,7 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
if (ret < 0)
return ret;
- return nvme_sc_to_pr_err(ret);
+ return nvme_status_to_pr_err(ret);
}
static int nvme_pr_register(struct block_device *bdev, u64 old,
@@ -196,7 +196,7 @@ retry:
if (ret < 0)
return ret;
- return nvme_sc_to_pr_err(ret);
+ return nvme_status_to_pr_err(ret);
}
static int nvme_pr_read_keys(struct block_device *bdev,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 51a62b0c645a..2eb33842f971 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2201,6 +2201,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
@@ -2237,12 +2238,11 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}
-static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+static struct nvme_rdma_ctrl *nvme_rdma_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_rdma_ctrl *ctrl;
int ret;
- bool changed;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@@ -2304,6 +2304,30 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ return ctrl;
+
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_rdma_ctrl *ctrl;
+ bool changed;
+ int ret;
+
+ ctrl = nvme_rdma_alloc_ctrl(dev, opts);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed);
@@ -2322,15 +2346,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
-out_kfree_queues:
- kfree(ctrl->queues);
-out_free_ctrl:
- kfree(ctrl);
- return ERR_PTR(ret);
}
static struct nvmf_transport_ops nvme_rdma_transport = {
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8b5e4327fe83..a2a47d3ab99f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2662,6 +2662,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_tcp_free_ctrl,
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
@@ -2686,7 +2687,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}
-static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_tcp_ctrl *ctrl;
@@ -2761,6 +2762,28 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ return ctrl;
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_tcp_ctrl *ctrl;
+ int ret;
+
+ ctrl = nvme_tcp_alloc_ctrl(dev, opts);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
WARN_ON_ONCE(1);
ret = -EINTR;
@@ -2782,15 +2805,11 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
-out_kfree_queues:
- kfree(ctrl->queues);
-out_free_ctrl:
- kfree(ctrl);
- return ERR_PTR(ret);
}
static struct nvmf_transport_ops nvme_tcp_transport = {
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 77aa0f440a6d..9a06f9d98cd6 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -108,13 +108,12 @@ free_data:
void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
struct nvme_zone_info *zi)
{
- lim->zoned = 1;
+ lim->features |= BLK_FEAT_ZONED;
lim->max_open_zones = zi->max_open_zones;
lim->max_active_zones = zi->max_active_zones;
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
lim->chunk_sectors = ns->head->zsze =
nvme_lba_to_sect(ns->head, zi->zone_size);
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
}
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 872dd1a0acd8..46be031f91b4 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -6,7 +6,6 @@ config NVME_TARGET
depends on CONFIGFS_FS
select NVME_KEYRING if NVME_TARGET_TCP_TLS
select KEYS if NVME_TARGET_TCP_TLS
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
@@ -18,6 +17,15 @@ config NVME_TARGET
To configure the NVMe target you probably want to use the nvmetcli
tool from http://git.infradead.org/users/hch/nvmetcli.git.
+config NVME_TARGET_DEBUGFS
+ bool "NVMe Target debugfs support"
+ depends on NVME_TARGET
+ help
+ This enables debugfs support to display the connected controllers
+ to each subsystem
+
+ If unsure, say N.
+
config NVME_TARGET_PASSTHRU
bool "NVMe Target Passthrough support"
depends on NVME_TARGET
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index c66820102493..c402c44350b2 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
+nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvmet-$(CONFIG_NVME_TARGET_AUTH) += fabrics-cmd-auth.o auth.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index f5b7054a4a05..f7e1156ac7ec 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -344,7 +344,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
- nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_STATUS_DNR);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
@@ -496,7 +496,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -662,7 +662,7 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
off) != NVME_IDENTIFY_DATA_SIZE - off)
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
out:
nvmet_req_complete(req, status);
@@ -724,7 +724,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
pr_debug("unhandled identify cns %d on qid %d\n",
req->cmd->identify.cns, req->sq->qid);
req->error_loc = offsetof(struct nvme_identify, cns);
- nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_STATUS_DNR);
}
/*
@@ -807,7 +807,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
if (val32 & ~mask) {
req->error_loc = offsetof(struct nvme_common_command, cdw11);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
@@ -833,7 +833,7 @@ void nvmet_execute_set_features(struct nvmet_req *req)
ncqr = (cdw11 >> 16) & 0xffff;
nsqr = cdw11 & 0xffff;
if (ncqr == 0xffff || nsqr == 0xffff) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
nvmet_set_result(req,
@@ -846,14 +846,14 @@ void nvmet_execute_set_features(struct nvmet_req *req)
status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL);
break;
case NVME_FEAT_HOST_ID:
- status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ status = NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
break;
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_set_feat_write_protect(req);
break;
default:
req->error_loc = offsetof(struct nvme_common_command, cdw10);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -939,7 +939,7 @@ void nvmet_execute_get_features(struct nvmet_req *req)
if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) {
req->error_loc =
offsetof(struct nvme_common_command, cdw11);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -952,7 +952,7 @@ void nvmet_execute_get_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -969,7 +969,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_STATUS_DNR);
return;
}
ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
@@ -1006,7 +1006,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
if (nvme_is_fabrics(cmd))
return nvmet_parse_fabrics_admin_cmd(req);
if (unlikely(!nvmet_check_auth_status(req)))
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
if (nvmet_is_disc_subsys(nvmet_req_subsys(req)))
return nvmet_parse_discovery_cmd(req);
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 7d2633940f9b..8bc3f431c77f 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -314,7 +314,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
req->sq->dhchap_c1,
challenge, shash_len);
if (ret)
- goto out_free_response;
+ goto out_free_challenge;
}
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
@@ -325,7 +325,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
GFP_KERNEL);
if (!shash) {
ret = -ENOMEM;
- goto out_free_response;
+ goto out_free_challenge;
}
shash->tfm = shash_tfm;
ret = crypto_shash_init(shash);
@@ -361,9 +361,10 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
goto out;
ret = crypto_shash_final(shash, response);
out:
+ kfree(shash);
+out_free_challenge:
if (challenge != req->sq->dhchap_c1)
kfree(challenge);
- kfree(shash);
out_free_response:
nvme_auth_free_key(transformed_key);
out_free_tfm:
@@ -427,14 +428,14 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
req->sq->dhchap_c2,
challenge, shash_len);
if (ret)
- goto out_free_response;
+ goto out_free_challenge;
}
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
GFP_KERNEL);
if (!shash) {
ret = -ENOMEM;
- goto out_free_response;
+ goto out_free_challenge;
}
shash->tfm = shash_tfm;
@@ -471,9 +472,10 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
goto out;
ret = crypto_shash_final(shash, response);
out:
+ kfree(shash);
+out_free_challenge:
if (challenge != req->sq->dhchap_c2)
kfree(challenge);
- kfree(shash);
out_free_response:
nvme_auth_free_key(transformed_key);
out_free_tfm:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index bd87dfd173a4..685e89b35d33 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -410,7 +410,29 @@ static ssize_t nvmet_addr_tsas_show(struct config_item *item,
return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name);
}
}
- return sprintf(page, "reserved\n");
+ return sprintf(page, "\n");
+}
+
+static u8 nvmet_addr_tsas_rdma_store(const char *page)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) {
+ if (sysfs_streq(page, nvmet_addr_tsas_rdma[i].name))
+ return nvmet_addr_tsas_rdma[i].type;
+ }
+ return NVMF_RDMA_QPTYPE_INVALID;
+}
+
+static u8 nvmet_addr_tsas_tcp_store(const char *page)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
+ if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name))
+ return nvmet_addr_tsas_tcp[i].type;
+ }
+ return NVMF_TCP_SECTYPE_INVALID;
}
static ssize_t nvmet_addr_tsas_store(struct config_item *item,
@@ -418,20 +440,19 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
u8 treq = nvmet_port_disc_addr_treq_mask(port);
- u8 sectype;
- int i;
+ u8 sectype, qptype;
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- if (port->disc_addr.trtype != NVMF_TRTYPE_TCP)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
- if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) {
- sectype = nvmet_addr_tsas_tcp[i].type;
+ if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) {
+ qptype = nvmet_addr_tsas_rdma_store(page);
+ if (qptype == port->disc_addr.tsas.rdma.qptype)
+ return count;
+ } else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) {
+ sectype = nvmet_addr_tsas_tcp_store(page);
+ if (sectype != NVMF_TCP_SECTYPE_INVALID)
goto found;
- }
}
pr_err("Invalid value '%s' for tsas\n", page);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4ff460ba2826..ed2424f8a396 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -16,6 +16,7 @@
#include "trace.h"
#include "nvmet.h"
+#include "debugfs.h"
struct kmem_cache *nvmet_bvec_cache;
struct workqueue_struct *buffered_io_wq;
@@ -55,18 +56,18 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
return NVME_SC_SUCCESS;
case -ENOSPC:
req->error_loc = offsetof(struct nvme_rw_command, length);
- return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
case -EREMOTEIO:
req->error_loc = offsetof(struct nvme_rw_command, slba);
- return NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ return NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
case -EOPNOTSUPP:
req->error_loc = offsetof(struct nvme_common_command, opcode);
switch (req->cmd->common.opcode) {
case nvme_cmd_dsm:
case nvme_cmd_write_zeroes:
- return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
default:
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
break;
case -ENODATA:
@@ -76,7 +77,7 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
fallthrough;
default:
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
}
}
@@ -86,7 +87,7 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req)
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
@@ -97,7 +98,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
{
if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -106,7 +107,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
{
if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -115,7 +116,7 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
{
if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -145,7 +146,7 @@ static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
@@ -444,7 +445,7 @@ u16 nvmet_req_find_ns(struct nvmet_req *req)
req->error_loc = offsetof(struct nvme_common_command, nsid);
if (nvmet_subsys_nsid_exists(subsys, nsid))
return NVME_SC_INTERNAL_PATH_ERROR;
- return NVME_SC_INVALID_NS | NVME_SC_DNR;
+ return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
}
percpu_ref_get(&req->ns->ref);
@@ -904,7 +905,7 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return nvmet_parse_fabrics_io_cmd(req);
if (unlikely(!nvmet_check_auth_status(req)))
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
ret = nvmet_check_ctrl_status(req);
if (unlikely(ret))
@@ -967,7 +968,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
req->error_loc = offsetof(struct nvme_common_command, flags);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -978,7 +979,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
*/
if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
req->error_loc = offsetof(struct nvme_common_command, flags);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -996,7 +997,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
trace_nvmet_req_init(req, req->cmd);
if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -1023,7 +1024,7 @@ bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
return false;
}
@@ -1035,7 +1036,7 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
if (unlikely(data_len > req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
return false;
}
@@ -1304,18 +1305,18 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req)
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
req->cmd->common.opcode, req->sq->qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
req->cmd->common.opcode, req->sq->qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
if (unlikely(!nvmet_check_auth_status(req))) {
pr_warn("qid %d not authenticated\n", req->sq->qid);
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
}
return 0;
}
@@ -1389,7 +1390,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
int ret;
u16 status;
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
subsys = nvmet_find_get_subsys(req->port, subsysnqn);
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\n",
@@ -1405,7 +1406,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
hostnqn, subsysnqn);
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
- status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_common_command, dptr);
goto out_put_subsystem;
}
@@ -1456,7 +1457,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
- status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
goto out_free_sqs;
}
ctrl->cntlid = ret;
@@ -1479,6 +1480,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
mutex_lock(&subsys->lock);
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
nvmet_setup_p2p_ns_map(ctrl, req);
+ nvmet_debugfs_ctrl_setup(ctrl);
mutex_unlock(&subsys->lock);
*ctrlp = ctrl;
@@ -1513,6 +1515,8 @@ static void nvmet_ctrl_free(struct kref *ref)
nvmet_destroy_auth(ctrl);
+ nvmet_debugfs_ctrl_free(ctrl);
+
ida_free(&cntlid_ida, ctrl->cntlid);
nvmet_async_events_free(ctrl);
@@ -1539,6 +1543,14 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
+ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ if (!ctrl->ops->host_traddr)
+ return -EOPNOTSUPP;
+ return ctrl->ops->host_traddr(ctrl, traddr, traddr_len);
+}
+
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn)
{
@@ -1633,8 +1645,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
INIT_LIST_HEAD(&subsys->ctrls);
INIT_LIST_HEAD(&subsys->hosts);
+ ret = nvmet_debugfs_subsys_setup(subsys);
+ if (ret)
+ goto free_subsysnqn;
+
return subsys;
+free_subsysnqn:
+ kfree(subsys->subsysnqn);
free_fr:
kfree(subsys->firmware_rev);
free_mn:
@@ -1651,6 +1669,8 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
+ nvmet_debugfs_subsys_free(subsys);
+
xa_destroy(&subsys->namespaces);
nvmet_passthru_subsys_free(subsys);
@@ -1705,11 +1725,18 @@ static int __init nvmet_init(void)
if (error)
goto out_free_nvmet_work_queue;
- error = nvmet_init_configfs();
+ error = nvmet_init_debugfs();
if (error)
goto out_exit_discovery;
+
+ error = nvmet_init_configfs();
+ if (error)
+ goto out_exit_debugfs;
+
return 0;
+out_exit_debugfs:
+ nvmet_exit_debugfs();
out_exit_discovery:
nvmet_exit_discovery();
out_free_nvmet_work_queue:
@@ -1726,6 +1753,7 @@ out_destroy_bvec_cache:
static void __exit nvmet_exit(void)
{
nvmet_exit_configfs();
+ nvmet_exit_debugfs();
nvmet_exit_discovery();
ida_destroy(&cntlid_ida);
destroy_workqueue(nvmet_wq);
diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c
new file mode 100644
index 000000000000..cb2befc8619e
--- /dev/null
+++ b/drivers/nvme/target/debugfs.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DebugFS interface for the NVMe target.
+ * Copyright (c) 2022-2024 Shadow
+ * Copyright (c) 2024 SUSE LLC
+ */
+
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include "nvmet.h"
+#include "debugfs.h"
+
+struct dentry *nvmet_debugfs;
+
+#define NVMET_DEBUGFS_ATTR(field) \
+ static int field##_open(struct inode *inode, struct file *file) \
+ { return single_open(file, field##_show, inode->i_private); } \
+ \
+ static const struct file_operations field##_fops = { \
+ .open = field##_open, \
+ .read = seq_read, \
+ .release = single_release, \
+ }
+
+#define NVMET_DEBUGFS_RW_ATTR(field) \
+ static int field##_open(struct inode *inode, struct file *file) \
+ { return single_open(file, field##_show, inode->i_private); } \
+ \
+ static const struct file_operations field##_fops = { \
+ .open = field##_open, \
+ .read = seq_read, \
+ .write = field##_write, \
+ .release = single_release, \
+ }
+
+static int nvmet_ctrl_hostnqn_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_puts(m, ctrl->hostnqn);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_hostnqn);
+
+static int nvmet_ctrl_kato_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_printf(m, "%d\n", ctrl->kato);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_kato);
+
+static int nvmet_ctrl_port_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_printf(m, "%d\n", le16_to_cpu(ctrl->port->disc_addr.portid));
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_port);
+
+static const char *const csts_state_names[] = {
+ [NVME_CSTS_RDY] = "ready",
+ [NVME_CSTS_CFS] = "fatal",
+ [NVME_CSTS_NSSRO] = "reset",
+ [NVME_CSTS_SHST_OCCUR] = "shutdown",
+ [NVME_CSTS_SHST_CMPLT] = "completed",
+ [NVME_CSTS_PP] = "paused",
+};
+
+static int nvmet_ctrl_state_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+ bool sep = false;
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ int state = BIT(i);
+
+ if (!(ctrl->csts & state))
+ continue;
+ if (sep)
+ seq_puts(m, "|");
+ sep = true;
+ if (csts_state_names[state])
+ seq_puts(m, csts_state_names[state]);
+ else
+ seq_printf(m, "%d", state);
+ }
+ if (sep)
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static ssize_t nvmet_ctrl_state_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *m = file->private_data;
+ struct nvmet_ctrl *ctrl = m->private;
+ char reset[16];
+
+ if (count >= sizeof(reset))
+ return -EINVAL;
+ if (copy_from_user(reset, buf, count))
+ return -EFAULT;
+ if (!memcmp(reset, "fatal", 5))
+ nvmet_ctrl_fatal_error(ctrl);
+ else
+ return -EINVAL;
+ return count;
+}
+NVMET_DEBUGFS_RW_ATTR(nvmet_ctrl_state);
+
+static int nvmet_ctrl_host_traddr_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+ ssize_t size;
+ char buf[NVMF_TRADDR_SIZE + 1];
+
+ size = nvmet_ctrl_host_traddr(ctrl, buf, NVMF_TRADDR_SIZE);
+ if (size < 0) {
+ buf[0] = '\0';
+ size = 0;
+ }
+ buf[size] = '\0';
+ seq_printf(m, "%s\n", buf);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_host_traddr);
+
+int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
+{
+ char name[32];
+ struct dentry *parent = ctrl->subsys->debugfs_dir;
+ int ret;
+
+ if (!parent)
+ return -ENODEV;
+ snprintf(name, sizeof(name), "ctrl%d", ctrl->cntlid);
+ ctrl->debugfs_dir = debugfs_create_dir(name, parent);
+ if (IS_ERR(ctrl->debugfs_dir)) {
+ ret = PTR_ERR(ctrl->debugfs_dir);
+ ctrl->debugfs_dir = NULL;
+ return ret;
+ }
+ debugfs_create_file("port", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_port_fops);
+ debugfs_create_file("hostnqn", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_hostnqn_fops);
+ debugfs_create_file("kato", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_kato_fops);
+ debugfs_create_file("state", S_IRUSR | S_IWUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_state_fops);
+ debugfs_create_file("host_traddr", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_host_traddr_fops);
+ return 0;
+}
+
+void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl)
+{
+ debugfs_remove_recursive(ctrl->debugfs_dir);
+}
+
+int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys)
+{
+ int ret = 0;
+
+ subsys->debugfs_dir = debugfs_create_dir(subsys->subsysnqn,
+ nvmet_debugfs);
+ if (IS_ERR(subsys->debugfs_dir)) {
+ ret = PTR_ERR(subsys->debugfs_dir);
+ subsys->debugfs_dir = NULL;
+ }
+ return ret;
+}
+
+void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys)
+{
+ debugfs_remove_recursive(subsys->debugfs_dir);
+}
+
+int __init nvmet_init_debugfs(void)
+{
+ struct dentry *parent;
+
+ parent = debugfs_create_dir("nvmet", NULL);
+ if (IS_ERR(parent)) {
+ pr_warn("%s: failed to create debugfs directory\n", "nvmet");
+ return PTR_ERR(parent);
+ }
+ nvmet_debugfs = parent;
+ return 0;
+}
+
+void nvmet_exit_debugfs(void)
+{
+ debugfs_remove_recursive(nvmet_debugfs);
+}
diff --git a/drivers/nvme/target/debugfs.h b/drivers/nvme/target/debugfs.h
new file mode 100644
index 000000000000..cfb8bbf6a297
--- /dev/null
+++ b/drivers/nvme/target/debugfs.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DebugFS interface for the NVMe target.
+ * Copyright (c) 2022-2024 Shadow
+ * Copyright (c) 2024 SUSE LLC
+ */
+#ifndef NVMET_DEBUGFS_H
+#define NVMET_DEBUGFS_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys);
+void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys);
+int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl);
+void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl);
+
+int __init nvmet_init_debugfs(void);
+void nvmet_exit_debugfs(void);
+#else
+static inline int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys)
+{
+ return 0;
+}
+static inline void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys){}
+
+static inline int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
+{
+ return 0;
+}
+static inline void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl) {}
+
+static inline int __init nvmet_init_debugfs(void)
+{
+ return 0;
+}
+
+static inline void nvmet_exit_debugfs(void) {}
+
+#endif
+
+#endif /* NVMET_DEBUGFS_H */
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index ce54da8c6b36..28843df5fa7c 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -179,7 +179,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
req->error_loc =
offsetof(struct nvme_get_log_page_command, lid);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -187,7 +187,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
if (offset & 0x3) {
req->error_loc =
offsetof(struct nvme_get_log_page_command, lpo);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -256,7 +256,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
req->error_loc = offsetof(struct nvme_identify, cns);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -320,7 +320,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ stat = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -345,7 +345,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ stat = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -361,7 +361,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
cmd->common.opcode);
req->error_loc =
offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
switch (cmd->common.opcode) {
@@ -386,7 +386,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
default:
pr_debug("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
}
diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index cb34d644ed08..3f2857c17d95 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -189,26 +189,26 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
u8 dhchap_status;
if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, secp);
goto done;
}
if (req->cmd->auth_send.spsp0 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, spsp0);
goto done;
}
if (req->cmd->auth_send.spsp1 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, spsp1);
goto done;
}
tl = le32_to_cpu(req->cmd->auth_send.tl);
if (!tl) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, tl);
goto done;
@@ -437,26 +437,26 @@ void nvmet_execute_auth_receive(struct nvmet_req *req)
u16 status = 0;
if (req->cmd->auth_receive.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, secp);
goto done;
}
if (req->cmd->auth_receive.spsp0 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, spsp0);
goto done;
}
if (req->cmd->auth_receive.spsp1 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, spsp1);
goto done;
}
al = le32_to_cpu(req->cmd->auth_receive.al);
if (!al) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, al);
goto done;
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 69d77d34bec1..c4b2eddd5666 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -18,7 +18,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
if (req->cmd->prop_set.attrib & 1) {
req->error_loc =
offsetof(struct nvmf_property_set_command, attrib);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -29,7 +29,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvmf_property_set_command, offset);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
out:
nvmet_req_complete(req, status);
@@ -50,7 +50,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
val = ctrl->cap;
break;
default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
} else {
@@ -65,7 +65,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
val = ctrl->csts;
break;
default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
}
@@ -105,7 +105,7 @@ u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req)
pr_debug("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return 0;
@@ -128,7 +128,7 @@ u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req)
pr_debug("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return 0;
@@ -147,14 +147,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
- ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
goto err;
}
if (ctrl->sqs[qid] != NULL) {
pr_warn("qid %u has already been created\n", qid);
req->error_loc = offsetof(struct nvmf_connect_command, qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
/* for fabrics, this value applies to only the I/O Submission Queues */
@@ -163,14 +163,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
sqsize, mqes, ctrl->cntlid);
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ return NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
}
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
pr_warn("queue already connected!\n");
req->error_loc = offsetof(struct nvmf_connect_command, opcode);
- return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
}
/* note: convert queue size from 0's-based value to 1's-based value */
@@ -230,14 +230,14 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
req->error_loc = offsetof(struct nvmf_connect_command, recfmt);
- status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_FORMAT | NVME_STATUS_DNR;
goto out;
}
if (unlikely(d->cntlid != cpu_to_le16(0xffff))) {
pr_warn("connect attempt for invalid controller ID %#x\n",
d->cntlid);
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
goto out;
}
@@ -257,7 +257,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
dhchap_status);
nvmet_ctrl_put(ctrl);
if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED)
- status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR);
+ status = (NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR);
else
status = NVME_SC_INTERNAL;
goto out;
@@ -305,7 +305,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
- status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_FORMAT | NVME_STATUS_DNR;
goto out;
}
@@ -314,13 +314,13 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
le16_to_cpu(d->cntlid), req);
if (!ctrl) {
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
goto out;
}
if (unlikely(qid > ctrl->subsys->max_qid)) {
pr_warn("invalid queue id (%d)\n", qid);
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
goto out_ctrl_put;
}
@@ -350,13 +350,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
pr_debug("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
if (cmd->connect.qid == 0)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 337ee1cb09ae..3ef4beacde32 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_queue {
struct workqueue_struct *work_q;
struct kref ref;
/* array of fcp_iods */
- struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize);
+ struct nvmet_fc_fcp_iod fod[] /* __counted_by(sqsize) */;
} __aligned(sizeof(unsigned long long));
struct nvmet_fc_hostport {
@@ -2934,6 +2934,38 @@ nvmet_fc_discovery_chg(struct nvmet_port *port)
tgtport->ops->discovery_event(&tgtport->fc_target_port);
}
+static ssize_t
+nvmet_fc_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_size)
+{
+ struct nvmet_sq *sq = ctrl->sqs[0];
+ struct nvmet_fc_tgt_queue *queue =
+ container_of(sq, struct nvmet_fc_tgt_queue, nvme_sq);
+ struct nvmet_fc_tgtport *tgtport = queue->assoc ? queue->assoc->tgtport : NULL;
+ struct nvmet_fc_hostport *hostport = queue->assoc ? queue->assoc->hostport : NULL;
+ u64 wwnn, wwpn;
+ ssize_t ret = 0;
+
+ if (!tgtport || !nvmet_fc_tgtport_get(tgtport))
+ return -ENODEV;
+ if (!hostport || !nvmet_fc_hostport_get(hostport)) {
+ ret = -ENODEV;
+ goto out_put;
+ }
+
+ if (tgtport->ops->host_traddr) {
+ ret = tgtport->ops->host_traddr(hostport->hosthandle, &wwnn, &wwpn);
+ if (ret)
+ goto out_put_host;
+ ret = snprintf(traddr, traddr_size, "nn-0x%llx:pn-0x%llx", wwnn, wwpn);
+ }
+out_put_host:
+ nvmet_fc_hostport_put(hostport);
+out_put:
+ nvmet_fc_tgtport_put(tgtport);
+ return ret;
+}
+
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_FC,
@@ -2943,6 +2975,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.queue_response = nvmet_fc_fcp_nvme_cmd_done,
.delete_ctrl = nvmet_fc_delete_ctrl,
.discovery_chg = nvmet_fc_discovery_chg,
+ .host_traddr = nvmet_fc_host_traddr,
};
static int __init nvmet_fc_init_module(void)
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 913cd2ec7a6f..e1abb27927ff 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -492,6 +492,16 @@ fcloop_t2h_host_release(void *hosthandle)
/* host handle ignored for now */
}
+static int
+fcloop_t2h_host_traddr(void *hosthandle, u64 *wwnn, u64 *wwpn)
+{
+ struct fcloop_rport *rport = hosthandle;
+
+ *wwnn = rport->lport->localport->node_name;
+ *wwpn = rport->lport->localport->port_name;
+ return 0;
+}
+
/*
* Simulate reception of RSCN and converting it to a initiator transport
* call to rescan a remote port.
@@ -1074,6 +1084,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.ls_req = fcloop_t2h_ls_req,
.ls_abort = fcloop_t2h_ls_abort,
.host_release = fcloop_t2h_host_release,
+ .host_traddr = fcloop_t2h_host_traddr,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 6426aac2634a..0bda83d0fc3e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -61,15 +61,17 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
{
struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
- if (bi) {
+ if (!bi)
+ return;
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) {
ns->metadata_size = bi->tuple_size;
- if (bi->profile == &t10_pi_type1_crc)
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
ns->pi_type = NVME_NS_DPS_PI_TYPE1;
- else if (bi->profile == &t10_pi_type3_crc)
- ns->pi_type = NVME_NS_DPS_PI_TYPE3;
else
- /* Unsupported metadata type */
- ns->metadata_size = 0;
+ ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+ } else {
+ ns->metadata_size = 0;
}
}
@@ -102,7 +104,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
ns->pi_type = 0;
ns->metadata_size = 0;
- if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
nvmet_bdev_ns_enable_integrity(ns);
if (bdev_is_zoned(ns->bdev)) {
@@ -135,11 +137,11 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
*/
switch (blk_sts) {
case BLK_STS_NOSPC:
- status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ status = NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_rw_command, length);
break;
case BLK_STS_TARGET:
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_rw_command, slba);
break;
case BLK_STS_NOTSUPP:
@@ -147,10 +149,10 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
switch (req->cmd->common.opcode) {
case nvme_cmd_dsm:
case nvme_cmd_write_zeroes:
- status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
break;
default:
- status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
break;
case BLK_STS_MEDIUM:
@@ -159,7 +161,7 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
break;
case BLK_STS_IOERR:
default:
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_common_command, opcode);
}
@@ -356,7 +358,7 @@ u16 nvmet_bdev_flush(struct nvmet_req *req)
return 0;
if (blkdev_issue_flush(req->ns->bdev))
- return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
return 0;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e589915ddef8..e32790d8fc26 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
goto out;
}
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
WARN_ON_ONCE(1);
@@ -611,6 +615,7 @@ out_free_queues:
kfree(ctrl->queues);
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
out:
if (ret > 0)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 2f22b07eab29..190f55e6d753 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -230,7 +230,9 @@ struct nvmet_ctrl {
struct device *p2p_client;
struct radix_tree_root p2p_ns_map;
-
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+ struct dentry *debugfs_dir;
+#endif
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
@@ -262,7 +264,9 @@ struct nvmet_subsys {
struct list_head hosts;
bool allow_any_host;
-
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+ struct dentry *debugfs_dir;
+#endif
u16 max_qid;
u64 ver;
@@ -350,6 +354,8 @@ struct nvmet_fabrics_ops {
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
void (*disc_traddr)(struct nvmet_req *req,
struct nvmet_port *port, char *traddr);
+ ssize_t (*host_traddr)(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port);
u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
@@ -498,6 +504,8 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
struct nvmet_req *req);
void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
u16 nvmet_check_ctrl_status(struct nvmet_req *req);
+ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len);
struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f003782d4ecf..24d0e2418d2e 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -306,7 +306,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
ns = nvme_find_get_ns(ctrl, nsid);
if (unlikely(!ns)) {
pr_err("failed to get passthru ns nsid:%u\n", nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -426,7 +426,7 @@ u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
* emulated in the future if regular targets grow support for
* this feature.
*/
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return nvmet_setup_passthru_command(req);
@@ -478,7 +478,7 @@ static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
case NVME_FEAT_RESV_PERSIST:
/* No reservations, see nvmet_parse_passthru_io_cmd() */
default:
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
}
@@ -546,7 +546,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
case NVME_ID_CNS_NS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
@@ -558,7 +558,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
default:
return nvmet_setup_passthru_command(req);
}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 689bb5d3cfdc..1eff8ca6a5f1 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -852,12 +852,12 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd)) {
rsp->req.error_loc =
offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
- return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR;
}
/* no data command? */
@@ -919,7 +919,7 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
case NVME_KEY_SGL_FMT_DATA_DESC:
switch (sgl->type & 0xf) {
@@ -931,12 +931,12 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
default:
pr_err("invalid SGL type: %#x\n", sgl->type);
rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_TYPE | NVME_STATUS_DNR;
}
}
@@ -2000,6 +2000,17 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
}
}
+static ssize_t nvmet_rdma_host_port_addr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ struct nvmet_sq *nvme_sq = ctrl->sqs[0];
+ struct nvmet_rdma_queue *queue =
+ container_of(nvme_sq, struct nvmet_rdma_queue, nvme_sq);
+
+ return snprintf(traddr, traddr_len, "%pISc",
+ (struct sockaddr *)&queue->cm_id->route.addr.dst_addr);
+}
+
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
if (ctrl->pi_support)
@@ -2024,6 +2035,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .host_traddr = nvmet_rdma_host_port_addr,
.get_mdts = nvmet_rdma_get_mdts,
.get_max_queue_size = nvmet_rdma_get_max_queue_size,
};
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 380f22ee3ebb..5bff0d5464d1 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -416,10 +416,10 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
NVME_SGL_FMT_OFFSET)) {
if (!nvme_is_write(cmd->req.cmd))
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
if (len > cmd->req.port->inline_data_size)
- return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR;
cmd->pdu_len = len;
}
cmd->req.transfer_len += len;
@@ -2167,6 +2167,19 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
}
}
+static ssize_t nvmet_tcp_host_port_addr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ struct nvmet_sq *sq = ctrl->sqs[0];
+ struct nvmet_tcp_queue *queue =
+ container_of(sq, struct nvmet_tcp_queue, nvme_sq);
+
+ if (queue->sockaddr_peer.ss_family == AF_UNSPEC)
+ return -EINVAL;
+ return snprintf(traddr, traddr_len, "%pISc",
+ (struct sockaddr *)&queue->sockaddr_peer);
+}
+
static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP,
@@ -2177,6 +2190,7 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.delete_ctrl = nvmet_tcp_delete_ctrl,
.install_queue = nvmet_tcp_install_queue,
.disc_traddr = nvmet_tcp_disc_port_addr,
+ .host_traddr = nvmet_tcp_host_port_addr,
};
static int __init nvmet_tcp_init(void)
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 0021d06041c1..af9e13be7678 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -100,7 +100,7 @@ void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -121,7 +121,7 @@ void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
}
if (!bdev_is_zoned(req->ns->bdev)) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_identify, nsid);
goto out;
}
@@ -158,17 +158,17 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
- return NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ return NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
}
if (out_bufsize < sizeof(struct nvme_zone_report)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
switch (req->cmd->zmr.pr) {
@@ -177,7 +177,7 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
break;
default:
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
switch (req->cmd->zmr.zrasf) {
@@ -193,7 +193,7 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
return NVME_SC_SUCCESS;
@@ -341,7 +341,7 @@ static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret)
return NVME_SC_SUCCESS;
case -EINVAL:
case -EIO:
- return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+ return NVME_SC_ZONE_INVALID_TRANSITION | NVME_STATUS_DNR;
default:
return NVME_SC_INTERNAL;
}
@@ -463,7 +463,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
default:
/* this is needed to quiet compiler warning */
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
return NVME_SC_SUCCESS;
@@ -481,7 +481,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
if (op == REQ_OP_LAST) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
- status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+ status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_STATUS_DNR;
goto out;
}
@@ -493,13 +493,13 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
if (sect >= get_capacity(bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
goto out;
}
if (sect & (zone_sectors - 1)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -551,13 +551,13 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_rw_command, slba);
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
goto out;
}
if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) {
req->error_loc = offsetof(struct nvme_rw_command, slba);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -590,7 +590,7 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
}
if (total_len != nvmet_rw_data_len(req)) {
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
goto out_put_bio;
}
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index e1ec3b7200d7..f8dd7eb40fbe 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -396,10 +396,9 @@ static int nvmem_sysfs_setup_compat(struct nvmem_device *nvmem,
if (!config->base_dev)
return -EINVAL;
- if (config->type == NVMEM_TYPE_FRAM)
- bin_attr_nvmem_eeprom_compat.attr.name = "fram";
-
nvmem->eeprom = bin_attr_nvmem_eeprom_compat;
+ if (config->type == NVMEM_TYPE_FRAM)
+ nvmem->eeprom.attr.name = "fram";
nvmem->eeprom.attr.mode = nvmem_bin_attr_get_umode(nvmem);
nvmem->eeprom.size = nvmem->size;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -463,7 +462,7 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem)
"%s@%x,%x", entry->name,
entry->offset,
entry->bit_offset);
- attrs[i].attr.mode = 0444;
+ attrs[i].attr.mode = 0444 & nvmem_bin_attr_get_umode(nvmem);
attrs[i].size = entry->bytes;
attrs[i].read = &nvmem_cell_attr_read;
attrs[i].private = entry;
diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c
index 33678d0af2c2..6c2f80e166e2 100644
--- a/drivers/nvmem/meson-efuse.c
+++ b/drivers/nvmem/meson-efuse.c
@@ -18,18 +18,24 @@ static int meson_efuse_read(void *context, unsigned int offset,
void *val, size_t bytes)
{
struct meson_sm_firmware *fw = context;
+ int ret;
- return meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset,
- bytes, 0, 0, 0);
+ ret = meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset,
+ bytes, 0, 0, 0);
+
+ return ret < 0 ? ret : 0;
}
static int meson_efuse_write(void *context, unsigned int offset,
void *val, size_t bytes)
{
struct meson_sm_firmware *fw = context;
+ int ret;
+
+ ret = meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset,
+ bytes, 0, 0, 0);
- return meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset,
- bytes, 0, 0, 0);
+ return ret < 0 ? ret : 0;
}
static const struct of_device_id meson_efuse_match[] = {
diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c
index 752d0bf4445e..7f907c5a445e 100644
--- a/drivers/nvmem/rmem.c
+++ b/drivers/nvmem/rmem.c
@@ -46,7 +46,10 @@ static int rmem_read(void *context, unsigned int offset,
memunmap(addr);
- return count;
+ if (count < 0)
+ return count;
+
+ return count == bytes ? 0 : -EIO;
}
static int rmem_probe(struct platform_device *pdev)
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 462375b293e4..c94203ce65bb 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent);
/*
* These interrupt controllers abuse interrupt-map for unspeakable
* reasons and rely on the core code to *ignore* it (the drivers do
- * their own parsing of the property).
+ * their own parsing of the property). The PAsemi entry covers a
+ * non-sensical interrupt-map that is better left ignored.
*
* If you think of adding to the list for something *new*, think
* again. There is a high chance that you will be sent back to the
@@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = {
"fsl,ls1043a-extirq",
"fsl,ls1088a-extirq",
"renesas,rza1-irqc",
+ "pasemi,rootbus",
NULL,
};
@@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
imaplen -= imap - oldimap;
pr_debug(" -> imaplen=%d\n", imaplen);
}
- if (!match) {
- if (intc) {
- /*
- * The PASEMI Nemo is a known offender, so
- * let's only warn for anyone else.
- */
- WARN(!IS_ENABLED(CONFIG_PPC_PASEMI),
- "%pOF interrupt-map failed, using interrupt-controller\n",
- ipar);
- return 0;
- }
-
+ if (!match)
goto fail;
- }
/*
* Successfully parsed an interrupt-map translation; copy new
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index c5625dd9bf49..3a45879d85db 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -352,7 +352,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
struct irq_affinity *affd)
{
struct irq_affinity_desc *masks = NULL;
- struct msi_desc *entry;
+ struct msi_desc *entry, desc;
int ret;
/* Reject multi-MSI early on irq domain enabled architectures */
@@ -377,6 +377,12 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
/* All MSIs are unmasked by default; mask them all */
entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
pci_msi_mask(entry, msi_multi_mask(entry));
+ /*
+ * Copy the MSI descriptor for the error path because
+ * pci_msi_setup_msi_irqs() will free it for the hierarchical
+ * interrupt domain case.
+ */
+ memcpy(&desc, entry, sizeof(desc));
/* Configure MSI capability structure */
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
@@ -396,7 +402,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
goto unlock;
err:
- pci_msi_unmask(entry, msi_multi_mask(entry));
+ pci_msi_unmask(&desc, msi_multi_mask(&desc));
pci_free_msi_irqs(dev);
fail:
dev->msi_enabled = 0;
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 78c490e0505a..0a02e85a8951 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
- if (!rvpmu->ctr_read)
+ if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index a2e4005e1fd0..4e842dcedfba 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -20,6 +20,7 @@
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
#include <linux/soc/andes/irq.h>
+#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
@@ -114,7 +115,7 @@ struct sbi_pmu_event_data {
};
};
-static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
-static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
+static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ 0, cmask, 0, edata->event_idx, 0, 0);
+ if (!ret.error) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ } else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
+ /* This event cannot be monitored by any counter */
+ edata->event_idx = -EINVAL;
+ }
+}
+
+static void pmu_sbi_check_std_events(struct work_struct *work)
+{
+ for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ pmu_sbi_check_event(&pmu_hw_event_map[i]);
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
+ for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
+ for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
+}
+
+static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
+
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
u64 raw_config_val;
int ret;
+ /*
+ * Ensure we are finished checking standard hardware events for
+ * validity before allowing userspace to configure any events.
+ */
+ flush_work(&check_std_events_work);
+
switch (type) {
case PERF_TYPE_HARDWARE:
if (config >= PERF_COUNT_HW_MAX)
@@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, pmu->cmask, 0, 0, 0, 0);
+ 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
@@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* Asynchronously check which standard events are available */
+ schedule_work(&check_std_events_work);
+
return 0;
out_unregister:
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 7f999e8a433d..7b00945f7191 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -187,6 +187,31 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_TX_TRANSCEIVER_BIAS_EN,
};
+static const unsigned int qmp_v6_n4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
+ [QPHY_SW_RESET] = QPHY_V6_N4_PCS_SW_RESET,
+ [QPHY_START_CTRL] = QPHY_V6_N4_PCS_START_CONTROL,
+ [QPHY_PCS_STATUS] = QPHY_V6_N4_PCS_PCS_STATUS1,
+ [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V6_N4_PCS_POWER_DOWN_CONTROL,
+
+ /* In PCS_USB */
+ [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V6_PCS_USB3_AUTONOMOUS_MODE_CTRL,
+ [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V6_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR,
+
+ [QPHY_COM_RESETSM_CNTRL] = QSERDES_V6_COM_RESETSM_CNTRL,
+ [QPHY_COM_C_READY_STATUS] = QSERDES_V6_COM_C_READY_STATUS,
+ [QPHY_COM_CMN_STATUS] = QSERDES_V6_COM_CMN_STATUS,
+ [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN,
+
+ [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV,
+
+ [QPHY_TX_TX_POL_INV] = QSERDES_V6_N4_TX_TX_POL_INV,
+ [QPHY_TX_TX_DRV_LVL] = QSERDES_V6_N4_TX_TX_DRV_LVL,
+ [QPHY_TX_TX_EMP_POST1_LVL] = QSERDES_V6_N4_TX_TX_EMP_POST1_LVL,
+ [QPHY_TX_HIGHZ_DRVR_EN] = QSERDES_V6_N4_TX_HIGHZ_DRVR_EN,
+ [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN,
+};
+
static const struct qmp_phy_init_tbl qmp_v3_usb3_serdes_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07),
QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14),
@@ -997,6 +1022,31 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f),
};
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SVS_MODE_CLK_SEL, 0x15),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x3b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_ENABLE1, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_SELECT, 0x30),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xc0),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x12),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE0, 0x3f),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CORE_CLK_DIV_MODE0, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_CTRL, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, 0x17),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f),
+};
+
static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V6_TX_VMODE_CTRL1, 0x40),
QMP_PHY_INIT_CFG(QSERDES_V6_TX_PRE_STALL_LDO_BOOST_EN, 0x30),
@@ -1011,6 +1061,19 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V6_TX_TX_BAND, 0x4),
};
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_tx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_VMODE_CTRL1, 0x40),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_INTERFACE_SELECT, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_CLKBUF_ENABLE, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RESET_TSYNC_EN, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX, 0x11),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX, 0x11),
+ QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TX_BAND, 0x1),
+};
+
static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_rbr[] = {
QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05),
QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34),
@@ -1059,6 +1122,74 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_hbr3[] = {
QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c),
};
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_rbr[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x37),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01),
+};
+
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01),
+};
+
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr2[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x46),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x0e),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x97),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x10),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x18),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x02),
+};
+
+static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr3[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x17),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x15),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92),
+ QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01),
+};
+
static const struct qmp_phy_init_tbl sc8280xp_usb43dp_serdes_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x01),
QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31),
@@ -1273,20 +1404,20 @@ static const struct qmp_phy_init_tbl x1e80100_usb43dp_rx_tbl[] = {
};
static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_tbl[] = {
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG1, 0xc4),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG2, 0x89),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG3, 0x20),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG6, 0x13),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_REFGEN_REQ_CONFIG1, 0x21),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_RX_SIGDET_LVL, 0x55),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_CDR_RESET_TIME, 0x0a),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG1, 0xd4),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG2, 0x30),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_PCS_TX_RX_CONFIG, 0x0c),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG1, 0x4b),
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG5, 0x10),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1, 0xc4),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2, 0x89),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3, 0x20),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6, 0x13),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1, 0x21),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_SIGDET_LVL, 0x55),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_CONFIG, 0x0a),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1, 0xd4),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2, 0x30),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG, 0x0c),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG1, 0x4b),
+ QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG5, 0x10),
};
static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_usb_tbl[] = {
@@ -1794,22 +1925,22 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = {
.pcs_usb_tbl = x1e80100_usb43dp_pcs_usb_tbl,
.pcs_usb_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_pcs_usb_tbl),
- .dp_serdes_tbl = qmp_v6_dp_serdes_tbl,
- .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl),
- .dp_tx_tbl = qmp_v6_dp_tx_tbl,
- .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl),
+ .dp_serdes_tbl = qmp_v6_n4_dp_serdes_tbl,
+ .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl),
+ .dp_tx_tbl = qmp_v6_n4_dp_tx_tbl,
+ .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_tx_tbl),
- .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr,
- .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr),
- .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr,
- .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr),
- .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2,
- .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2),
- .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3,
- .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3),
+ .serdes_tbl_rbr = qmp_v6_n4_dp_serdes_tbl_rbr,
+ .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_rbr),
+ .serdes_tbl_hbr = qmp_v6_n4_dp_serdes_tbl_hbr,
+ .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr),
+ .serdes_tbl_hbr2 = qmp_v6_n4_dp_serdes_tbl_hbr2,
+ .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr2),
+ .serdes_tbl_hbr3 = qmp_v6_n4_dp_serdes_tbl_hbr3,
+ .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr3),
- .swing_hbr_rbr = &qmp_dp_v5_voltage_swing_hbr_rbr,
- .pre_emphasis_hbr_rbr = &qmp_dp_v5_pre_emphasis_hbr_rbr,
+ .swing_hbr_rbr = &qmp_dp_v6_voltage_swing_hbr_rbr,
+ .pre_emphasis_hbr_rbr = &qmp_dp_v6_pre_emphasis_hbr_rbr,
.swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2,
.pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2,
@@ -1822,7 +1953,7 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = {
.num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l),
.vreg_list = qmp_phy_vreg_l,
.num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
- .regs = qmp_v45_usb3phy_regs_layout,
+ .regs = qmp_v6_n4_usb3phy_regs_layout,
};
static const struct qmp_phy_cfg sm6350_usb3dpphy_cfg = {
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h
new file mode 100644
index 000000000000..b3024714dab4
--- /dev/null
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023, Linaro Limited
+ */
+
+#ifndef QCOM_PHY_QMP_PCS_V6_N4_H_
+#define QCOM_PHY_QMP_PCS_V6_N4_H_
+
+/* Only for QMP V6 N4 PHY - USB/PCIe PCS registers */
+#define QPHY_V6_N4_PCS_SW_RESET 0x000
+#define QPHY_V6_N4_PCS_PCS_STATUS1 0x014
+#define QPHY_V6_N4_PCS_POWER_DOWN_CONTROL 0x040
+#define QPHY_V6_N4_PCS_START_CONTROL 0x044
+#define QPHY_V6_N4_PCS_POWER_STATE_CONFIG1 0x090
+#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1 0x0c4
+#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2 0x0c8
+#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3 0x0cc
+#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6 0x0d8
+#define QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1 0x0dc
+#define QPHY_V6_N4_PCS_RX_SIGDET_LVL 0x188
+#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L 0x190
+#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H 0x194
+#define QPHY_V6_N4_PCS_RATE_SLEW_CNTRL1 0x198
+#define QPHY_V6_N4_PCS_RX_CONFIG 0x1b0
+#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1 0x1c0
+#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2 0x1c4
+#define QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG 0x1d0
+#define QPHY_V6_N4_PCS_EQ_CONFIG1 0x1dc
+#define QPHY_V6_N4_PCS_EQ_CONFIG2 0x1e0
+#define QPHY_V6_N4_PCS_EQ_CONFIG5 0x1ec
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h
index a814ad11af07..d37cc0d4fd36 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h
@@ -6,11 +6,24 @@
#ifndef QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_
#define QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_
+#define QSERDES_V6_N4_TX_CLKBUF_ENABLE 0x08
+#define QSERDES_V6_N4_TX_TX_EMP_POST1_LVL 0x0c
+#define QSERDES_V6_N4_TX_TX_DRV_LVL 0x14
+#define QSERDES_V6_N4_TX_RESET_TSYNC_EN 0x1c
+#define QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN 0x20
#define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX 0x30
#define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX 0x34
+#define QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN 0x48
+#define QSERDES_V6_N4_TX_HIGHZ_DRVR_EN 0x4c
+#define QSERDES_V6_N4_TX_TX_POL_INV 0x50
+#define QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN 0x54
#define QSERDES_V6_N4_TX_LANE_MODE_1 0x78
#define QSERDES_V6_N4_TX_LANE_MODE_2 0x7c
#define QSERDES_V6_N4_TX_LANE_MODE_3 0x80
+#define QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN 0xac
+#define QSERDES_V6_N4_TX_TX_BAND 0xd8
+#define QSERDES_V6_N4_TX_INTERFACE_SELECT 0xe4
+#define QSERDES_V6_N4_TX_VMODE_CTRL1 0xb0
#define QSERDES_V6_N4_RX_UCDR_FO_GAIN_RATE2 0x8
#define QSERDES_V6_N4_RX_UCDR_SO_GAIN_RATE2 0x18
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index d10b8f653c4b..d0f41e4aaa85 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -46,6 +46,8 @@
#include "phy-qcom-qmp-pcs-v6.h"
+#include "phy-qcom-qmp-pcs-v6-n4.h"
+
#include "phy-qcom-qmp-pcs-v6_20.h"
#include "phy-qcom-qmp-pcs-v7.h"
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 7178a38475cc..27fd54795791 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -245,7 +245,7 @@ static const char * const irq_type_names[] = {
};
static bool persist_gpio_outputs;
-module_param(persist_gpio_outputs, bool, 0644);
+module_param(persist_gpio_outputs, bool, 0444);
MODULE_PARM_DESC(persist_gpio_outputs, "Enable GPIO_OUT persistence when pin is freed");
static inline u32 bcm2835_gpio_rd(struct bcm2835_pinctrl *pc, unsigned reg)
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index cffeb869130d..f424a57f0013 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1106,8 +1106,8 @@ static struct pinctrl *create_pinctrl(struct device *dev,
* an -EPROBE_DEFER later, as that is the worst case.
*/
if (ret == -EPROBE_DEFER) {
- pinctrl_free(p, false);
mutex_unlock(&pinctrl_maps_mutex);
+ pinctrl_free(p, false);
return ERR_PTR(ret);
}
}
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index 3bedf36a0019..3f56991f5b89 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -634,23 +634,68 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = {
static struct rockchip_mux_recalced_data rk3328_mux_recalced_data[] = {
{
- .num = 2,
- .pin = 12,
- .reg = 0x24,
- .bit = 8,
- .mask = 0x3
- }, {
+ /* gpio2_b7_sel */
.num = 2,
.pin = 15,
.reg = 0x28,
.bit = 0,
.mask = 0x7
}, {
+ /* gpio2_c7_sel */
.num = 2,
.pin = 23,
.reg = 0x30,
.bit = 14,
.mask = 0x3
+ }, {
+ /* gpio3_b1_sel */
+ .num = 3,
+ .pin = 9,
+ .reg = 0x44,
+ .bit = 2,
+ .mask = 0x3
+ }, {
+ /* gpio3_b2_sel */
+ .num = 3,
+ .pin = 10,
+ .reg = 0x44,
+ .bit = 4,
+ .mask = 0x3
+ }, {
+ /* gpio3_b3_sel */
+ .num = 3,
+ .pin = 11,
+ .reg = 0x44,
+ .bit = 6,
+ .mask = 0x3
+ }, {
+ /* gpio3_b4_sel */
+ .num = 3,
+ .pin = 12,
+ .reg = 0x44,
+ .bit = 8,
+ .mask = 0x3
+ }, {
+ /* gpio3_b5_sel */
+ .num = 3,
+ .pin = 13,
+ .reg = 0x44,
+ .bit = 10,
+ .mask = 0x3
+ }, {
+ /* gpio3_b6_sel */
+ .num = 3,
+ .pin = 14,
+ .reg = 0x44,
+ .bit = 12,
+ .mask = 0x3
+ }, {
+ /* gpio3_b7_sel */
+ .num = 3,
+ .pin = 15,
+ .reg = 0x44,
+ .bit = 14,
+ .mask = 0x3
},
};
@@ -2433,6 +2478,7 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
case RK3188:
case RK3288:
case RK3308:
+ case RK3328:
case RK3368:
case RK3399:
case RK3568:
@@ -2491,6 +2537,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
case RK3188:
case RK3288:
case RK3308:
+ case RK3328:
case RK3368:
case RK3399:
case RK3568:
@@ -2704,8 +2751,10 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
if (ret) {
/* revert the already done pin settings */
- for (cnt--; cnt >= 0; cnt--)
+ for (cnt--; cnt >= 0; cnt--) {
+ bank = pin_to_bank(info, pins[cnt]);
rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0);
+ }
return ret;
}
@@ -2753,6 +2802,7 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl,
case RK3188:
case RK3288:
case RK3308:
+ case RK3328:
case RK3368:
case RK3399:
case RK3568:
@@ -3763,7 +3813,7 @@ static struct rockchip_pin_bank rk3328_pin_banks[] = {
PIN_BANK_IOMUX_FLAGS(0, 32, "gpio0", 0, 0, 0, 0),
PIN_BANK_IOMUX_FLAGS(1, 32, "gpio1", 0, 0, 0, 0),
PIN_BANK_IOMUX_FLAGS(2, 32, "gpio2", 0,
- IOMUX_WIDTH_3BIT,
+ 0,
IOMUX_WIDTH_3BIT,
0),
PIN_BANK_IOMUX_FLAGS(3, 32, "gpio3",
@@ -3777,7 +3827,7 @@ static struct rockchip_pin_ctrl rk3328_pin_ctrl = {
.pin_banks = rk3328_pin_banks,
.nr_banks = ARRAY_SIZE(rk3328_pin_banks),
.label = "RK3328-GPIO",
- .type = RK3288,
+ .type = RK3328,
.grf_mux_offset = 0x0,
.iomux_recalced = rk3328_mux_recalced_data,
.niomux_recalced = ARRAY_SIZE(rk3328_mux_recalced_data),
diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h
index 4759f336941e..849266f8b191 100644
--- a/drivers/pinctrl/pinctrl-rockchip.h
+++ b/drivers/pinctrl/pinctrl-rockchip.h
@@ -193,6 +193,7 @@ enum rockchip_pinctrl_type {
RK3188,
RK3288,
RK3308,
+ RK3328,
RK3368,
RK3399,
RK3568,
diff --git a/drivers/pinctrl/pinctrl-tps6594.c b/drivers/pinctrl/pinctrl-tps6594.c
index 085047320853..5e7c7cf93445 100644
--- a/drivers/pinctrl/pinctrl-tps6594.c
+++ b/drivers/pinctrl/pinctrl-tps6594.c
@@ -486,6 +486,7 @@ static int tps6594_pinctrl_probe(struct platform_device *pdev)
break;
case TPS6593:
case TPS6594:
+ case LP8764:
pctrl_desc->pins = tps6594_pins;
pctrl_desc->npins = ARRAY_SIZE(tps6594_pins);
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index 4e80c7204e5f..4abd6f18bbef 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -1207,7 +1207,6 @@ static const struct of_device_id pmic_gpio_of_match[] = {
{ .compatible = "qcom,pm7325-gpio", .data = (void *) 10 },
{ .compatible = "qcom,pm7550ba-gpio", .data = (void *) 8},
{ .compatible = "qcom,pm8005-gpio", .data = (void *) 4 },
- { .compatible = "qcom,pm8008-gpio", .data = (void *) 2 },
{ .compatible = "qcom,pm8019-gpio", .data = (void *) 6 },
/* pm8150 has 10 GPIOs with holes on 2, 5, 7 and 8 */
{ .compatible = "qcom,pm8150-gpio", .data = (void *) 10 },
diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
index c3256bfde502..60be78da9f52 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -2071,11 +2071,11 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl)
* This has to be atomically executed to protect against a concurrent
* interrupt.
*/
- raw_spin_lock_irqsave(&pctrl->lock.rlock, flags);
+ spin_lock_irqsave(&pctrl->lock, flags);
ret = rzg2l_gpio_irq_set_type(data, irqd_get_trigger_type(data));
if (!ret && !irqd_irq_disabled(data))
rzg2l_gpio_irq_enable(data);
- raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags);
+ spin_unlock_irqrestore(&pctrl->lock, flags);
if (ret)
dev_crit(pctrl->dev, "Failed to set IRQ type for virq=%u\n", virq);
diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c
index 3ef655591424..abe7be602f84 100644
--- a/drivers/platform/mellanox/nvsw-sn2201.c
+++ b/drivers/platform/mellanox/nvsw-sn2201.c
@@ -1198,6 +1198,7 @@ static int nvsw_sn2201_config_pre_init(struct nvsw_sn2201 *nvsw_sn2201)
static int nvsw_sn2201_probe(struct platform_device *pdev)
{
struct nvsw_sn2201 *nvsw_sn2201;
+ int ret;
nvsw_sn2201 = devm_kzalloc(&pdev->dev, sizeof(*nvsw_sn2201), GFP_KERNEL);
if (!nvsw_sn2201)
@@ -1205,8 +1206,10 @@ static int nvsw_sn2201_probe(struct platform_device *pdev)
nvsw_sn2201->dev = &pdev->dev;
platform_set_drvdata(pdev, nvsw_sn2201);
- platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources,
+ ret = platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources,
ARRAY_SIZE(nvsw_sn2201_lpc_io_resources));
+ if (ret)
+ return ret;
nvsw_sn2201->main_mux_deferred_nr = NVSW_SN2201_MAIN_MUX_DEFER_NR;
nvsw_sn2201->main_mux_devs = nvsw_sn2201_main_mux_brdinfo;
diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c
index efcf909786a5..2423dc91debb 100644
--- a/drivers/platform/x86/amilo-rfkill.c
+++ b/drivers/platform/x86/amilo-rfkill.c
@@ -171,6 +171,7 @@ static void __exit amilo_rfkill_exit(void)
}
MODULE_AUTHOR("Ben Hutchings <[email protected]>");
+MODULE_DESCRIPTION("Fujitsu-Siemens Amilo rfkill support");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(dmi, amilo_rfkill_id_table);
diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c
index dd8240009565..182a07d8ae3d 100644
--- a/drivers/platform/x86/firmware_attributes_class.c
+++ b/drivers/platform/x86/firmware_attributes_class.c
@@ -49,4 +49,5 @@ int fw_attributes_class_put(void)
EXPORT_SYMBOL_GPL(fw_attributes_class_put);
MODULE_AUTHOR("Mark Pearson <[email protected]>");
+MODULE_DESCRIPTION("Firmware attributes class helper module");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 1d4bbae115f1..231b37909801 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -29,6 +29,7 @@ static bool debug;
module_param(debug, bool, 0644);
MODULE_PARM_DESC(debug, "Show debug output");
+MODULE_DESCRIPTION("IBM Premium Real Time Mode (PRTM) driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Keith Mannthey <[email protected]>");
MODULE_AUTHOR("Vernon Mauery <[email protected]>");
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index c7a827645864..10cd65497cc1 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -38,6 +38,7 @@ MODULE_PARM_DESC(enable_sw_tablet_mode,
/* When NOT in tablet mode, VGBS returns with the flag 0x40 */
#define TABLET_MODE_FLAG BIT(6)
+MODULE_DESCRIPTION("Intel HID Event hotkey driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Alex Hung");
diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c
index ddfba38c2104..f2cb87dc2d37 100644
--- a/drivers/platform/x86/intel/pmc/pltdrv.c
+++ b/drivers/platform/x86/intel/pmc/pltdrv.c
@@ -86,4 +86,5 @@ static void __exit pmc_core_platform_exit(void)
module_init(pmc_core_platform_init);
module_exit(pmc_core_platform_exit);
+MODULE_DESCRIPTION("Intel PMC Core platform driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c
index 6bc9c4a603e0..f3a60e14d4c1 100644
--- a/drivers/platform/x86/intel/rst.c
+++ b/drivers/platform/x86/intel/rst.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+MODULE_DESCRIPTION("Intel Rapid Start Technology Driver");
MODULE_LICENSE("GPL");
static ssize_t irst_show_wakeup_events(struct device *dev,
diff --git a/drivers/platform/x86/intel/smartconnect.c b/drivers/platform/x86/intel/smartconnect.c
index cd25d0585324..31019a1a6d5e 100644
--- a/drivers/platform/x86/intel/smartconnect.c
+++ b/drivers/platform/x86/intel/smartconnect.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
+MODULE_DESCRIPTION("Intel Smart Connect disabling driver");
MODULE_LICENSE("GPL");
static int smartconnect_acpi_init(struct acpi_device *acpi)
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 84c1353eb12b..9b7ce03ba085 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -24,6 +24,7 @@
#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT)
+MODULE_DESCRIPTION("Intel Virtual Button driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("AceLan Kao");
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index d0fee5d375d7..9c7857842caf 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -39,8 +39,6 @@ MODULE_LICENSE("GPL");
#define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210"
#define WMI_EVENT_GUID WMI_EVENT_GUID0
-#define WMAB_METHOD "\\XINI.WMAB"
-#define WMBB_METHOD "\\XINI.WMBB"
#define SB_GGOV_METHOD "\\_SB.GGOV"
#define GOV_TLED 0x2020008
#define WM_GET 1
@@ -74,7 +72,7 @@ static u32 inited;
static int battery_limit_use_wmbb;
static struct led_classdev kbd_backlight;
-static enum led_brightness get_kbd_backlight_level(void);
+static enum led_brightness get_kbd_backlight_level(struct device *dev);
static const struct key_entry wmi_keymap[] = {
{KE_KEY, 0x70, {KEY_F15} }, /* LG control panel (F1) */
@@ -84,7 +82,6 @@ static const struct key_entry wmi_keymap[] = {
* this key both sends an event and
* changes backlight level.
*/
- {KE_KEY, 0x80, {KEY_RFKILL} },
{KE_END, 0}
};
@@ -128,11 +125,10 @@ static int ggov(u32 arg0)
return res;
}
-static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
+static union acpi_object *lg_wmab(struct device *dev, u32 method, u32 arg1, u32 arg2)
{
union acpi_object args[3];
acpi_status status;
- acpi_handle handle;
struct acpi_object_list arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -143,29 +139,22 @@ static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
args[2].type = ACPI_TYPE_INTEGER;
args[2].integer.value = arg2;
- status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle);
- if (ACPI_FAILURE(status)) {
- pr_err("Cannot get handle");
- return NULL;
- }
-
arg.count = 3;
arg.pointer = args;
- status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMAB", &arg, &buffer);
if (ACPI_FAILURE(status)) {
- acpi_handle_err(handle, "WMAB: call failed.\n");
+ dev_err(dev, "WMAB: call failed.\n");
return NULL;
}
return buffer.pointer;
}
-static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
+static union acpi_object *lg_wmbb(struct device *dev, u32 method_id, u32 arg1, u32 arg2)
{
union acpi_object args[3];
acpi_status status;
- acpi_handle handle;
struct acpi_object_list arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
u8 buf[32];
@@ -181,18 +170,12 @@ static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
args[2].buffer.length = 32;
args[2].buffer.pointer = buf;
- status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle);
- if (ACPI_FAILURE(status)) {
- pr_err("Cannot get handle");
- return NULL;
- }
-
arg.count = 3;
arg.pointer = args;
- status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMBB", &arg, &buffer);
if (ACPI_FAILURE(status)) {
- acpi_handle_err(handle, "WMAB: call failed.\n");
+ dev_err(dev, "WMBB: call failed.\n");
return NULL;
}
@@ -223,7 +206,7 @@ static void wmi_notify(u32 value, void *context)
if (eventcode == 0x10000000) {
led_classdev_notify_brightness_hw_changed(
- &kbd_backlight, get_kbd_backlight_level());
+ &kbd_backlight, get_kbd_backlight_level(kbd_backlight.dev->parent));
} else {
key = sparse_keymap_entry_from_scancode(
wmi_input_dev, eventcode);
@@ -272,14 +255,7 @@ static void wmi_input_setup(void)
static void acpi_notify(struct acpi_device *device, u32 event)
{
- struct key_entry *key;
-
acpi_handle_debug(device->handle, "notify: %d\n", event);
- if (inited & INIT_SPARSE_KEYMAP) {
- key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80);
- if (key && key->type == KE_KEY)
- sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
- }
}
static ssize_t fan_mode_store(struct device *dev,
@@ -295,7 +271,7 @@ static ssize_t fan_mode_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -306,9 +282,9 @@ static ssize_t fan_mode_store(struct device *dev,
m = r->integer.value;
kfree(r);
- r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
+ r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
kfree(r);
- r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
kfree(r);
return count;
@@ -320,7 +296,7 @@ static ssize_t fan_mode_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -347,7 +323,7 @@ static ssize_t usb_charge_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value);
+ r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_SET, value);
if (!r)
return -EIO;
@@ -361,7 +337,7 @@ static ssize_t usb_charge_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0);
+ r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_GET, 0);
if (!r)
return -EIO;
@@ -389,7 +365,7 @@ static ssize_t reader_mode_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_READER_MODE, WM_SET, value);
+ r = lg_wmab(dev, WM_READER_MODE, WM_SET, value);
if (!r)
return -EIO;
@@ -403,7 +379,7 @@ static ssize_t reader_mode_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_READER_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_READER_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -431,7 +407,7 @@ static ssize_t fn_lock_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_FN_LOCK, WM_SET, value);
+ r = lg_wmab(dev, WM_FN_LOCK, WM_SET, value);
if (!r)
return -EIO;
@@ -445,7 +421,7 @@ static ssize_t fn_lock_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_FN_LOCK, WM_GET, 0);
+ r = lg_wmab(dev, WM_FN_LOCK, WM_GET, 0);
if (!r)
return -EIO;
@@ -475,9 +451,9 @@ static ssize_t charge_control_end_threshold_store(struct device *dev,
union acpi_object *r;
if (battery_limit_use_wmbb)
- r = lg_wmbb(WMBB_BATT_LIMIT, WM_SET, value);
+ r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_SET, value);
else
- r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
+ r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_SET, value);
if (!r)
return -EIO;
@@ -496,7 +472,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device,
union acpi_object *r;
if (battery_limit_use_wmbb) {
- r = lg_wmbb(WMBB_BATT_LIMIT, WM_GET, 0);
+ r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_GET, 0);
if (!r)
return -EIO;
@@ -507,7 +483,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device,
status = r->buffer.pointer[0x10];
} else {
- r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
+ r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_GET, 0);
if (!r)
return -EIO;
@@ -586,7 +562,7 @@ static void tpad_led_set(struct led_classdev *cdev,
{
union acpi_object *r;
- r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF);
+ r = lg_wmab(cdev->dev->parent, WM_TLED, WM_SET, brightness > LED_OFF);
kfree(r);
}
@@ -608,16 +584,16 @@ static void kbd_backlight_set(struct led_classdev *cdev,
val = 0;
if (brightness >= LED_FULL)
val = 0x24;
- r = lg_wmab(WM_KEY_LIGHT, WM_SET, val);
+ r = lg_wmab(cdev->dev->parent, WM_KEY_LIGHT, WM_SET, val);
kfree(r);
}
-static enum led_brightness get_kbd_backlight_level(void)
+static enum led_brightness get_kbd_backlight_level(struct device *dev)
{
union acpi_object *r;
int val;
- r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0);
+ r = lg_wmab(dev, WM_KEY_LIGHT, WM_GET, 0);
if (!r)
return LED_OFF;
@@ -645,7 +621,7 @@ static enum led_brightness get_kbd_backlight_level(void)
static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
{
- return get_kbd_backlight_level();
+ return get_kbd_backlight_level(cdev->dev->parent);
}
static LED_DEVICE(kbd_backlight, 255, LED_BRIGHT_HW_CHANGED);
@@ -672,6 +648,11 @@ static struct platform_driver pf_driver = {
static int acpi_add(struct acpi_device *device)
{
+ struct platform_device_info pdev_info = {
+ .fwnode = acpi_fwnode_handle(device),
+ .name = PLATFORM_NAME,
+ .id = PLATFORM_DEVID_NONE,
+ };
int ret;
const char *product;
int year = 2017;
@@ -683,9 +664,7 @@ static int acpi_add(struct acpi_device *device)
if (ret)
return ret;
- pf_device = platform_device_register_simple(PLATFORM_NAME,
- PLATFORM_DEVID_NONE,
- NULL, 0);
+ pf_device = platform_device_register_full(&pdev_info);
if (IS_ERR(pf_device)) {
ret = PTR_ERR(pf_device);
pf_device = NULL;
@@ -776,7 +755,7 @@ static void acpi_remove(struct acpi_device *device)
}
static const struct acpi_device_id device_ids[] = {
- {"LGEX0815", 0},
+ {"LGEX0820", 0},
{"", 0}
};
MODULE_DEVICE_TABLE(acpi, device_ids);
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
index 31a139d87d9a..5edc294de6e4 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
@@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Apollo Lake GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt platform:apollolake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
index a7676f224075..e6a56d14b505 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
@@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Elkhart Lake GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt platform:elkhartlake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
index 5e77e05fdb5d..f8849d0e48a8 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
@@ -81,6 +81,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Nuvoton GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt gpio_f7188x platform:elkhartlake-pinctrl platform:alderlake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt.c b/drivers/platform/x86/siemens/simatic-ipc-batt.c
index c6dd263b4ee3..d9aff10608cf 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt.c
@@ -247,6 +247,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS core battery driver for Siemens Simatic IPCs");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_AUTHOR("Henning Schild <[email protected]>");
diff --git a/drivers/platform/x86/siemens/simatic-ipc.c b/drivers/platform/x86/siemens/simatic-ipc.c
index 8ca6e277fa03..7039874d8f11 100644
--- a/drivers/platform/x86/siemens/simatic-ipc.c
+++ b/drivers/platform/x86/siemens/simatic-ipc.c
@@ -231,6 +231,7 @@ static void __exit simatic_ipc_exit_module(void)
module_init(simatic_ipc_init_module);
module_exit(simatic_ipc_exit_module);
+MODULE_DESCRIPTION("Siemens SIMATIC IPC platform driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Gerd Haeussler <[email protected]>");
MODULE_ALIAS("dmi:*:svnSIEMENSAG:*");
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 3a8d8df89186..78a5aac2dcfd 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -3271,7 +3271,7 @@ static const char *find_hci_method(acpi_handle handle)
*/
#define QUIRK_HCI_HOTKEY_QUICKSTART BIT(1)
-static const struct dmi_system_id toshiba_dmi_quirks[] = {
+static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = {
{
/* Toshiba Portégé R700 */
/* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */
@@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] = {
},
.driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART),
},
+ { }
};
static int toshiba_acpi_add(struct acpi_device *acpi_dev)
@@ -3306,8 +3307,6 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
struct toshiba_acpi_dev *dev;
const char *hci_method;
u32 dummy;
- const struct dmi_system_id *dmi_id;
- long quirks = 0;
int ret = 0;
if (toshiba_acpi)
@@ -3460,16 +3459,6 @@ iio_error:
}
#endif
- dmi_id = dmi_first_match(toshiba_dmi_quirks);
- if (dmi_id)
- quirks = (long)dmi_id->driver_data;
-
- if (turn_on_panel_on_resume == -1)
- turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME);
-
- if (hci_hotkey_quickstart == -1)
- hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART);
-
toshiba_wwan_available(dev);
if (dev->wwan_supported)
toshiba_acpi_setup_wwan_rfkill(dev);
@@ -3618,10 +3607,27 @@ static struct acpi_driver toshiba_acpi_driver = {
.drv.pm = &toshiba_acpi_pm,
};
+static void __init toshiba_dmi_init(void)
+{
+ const struct dmi_system_id *dmi_id;
+ long quirks = 0;
+
+ dmi_id = dmi_first_match(toshiba_dmi_quirks);
+ if (dmi_id)
+ quirks = (long)dmi_id->driver_data;
+
+ if (turn_on_panel_on_resume == -1)
+ turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME);
+
+ if (hci_hotkey_quickstart == -1)
+ hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART);
+}
+
static int __init toshiba_acpi_init(void)
{
int ret;
+ toshiba_dmi_init();
toshiba_proc_dir = proc_mkdir(PROC_TOSHIBA, acpi_root_dir);
if (!toshiba_proc_dir) {
pr_err("Unable to create proc dir " PROC_TOSHIBA "\n");
diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c
index 37372d7cc54a..f6a0627f36db 100644
--- a/drivers/platform/x86/uv_sysfs.c
+++ b/drivers/platform/x86/uv_sysfs.c
@@ -929,4 +929,5 @@ module_init(uv_sysfs_init);
module_exit(uv_sysfs_exit);
MODULE_AUTHOR("Hewlett Packard Enterprise");
+MODULE_DESCRIPTION("Sysfs structure for HPE UV systems");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c
index e95cdbbfb708..a220fe4f9ef8 100644
--- a/drivers/platform/x86/wireless-hotkey.c
+++ b/drivers/platform/x86/wireless-hotkey.c
@@ -14,11 +14,13 @@
#include <linux/acpi.h>
#include <acpi/acpi_bus.h>
+MODULE_DESCRIPTION("Airplane mode button for AMD, HP & Xiaomi laptops");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Alex Hung");
MODULE_ALIAS("acpi*:HPQ6001:*");
MODULE_ALIAS("acpi*:WSTADEF:*");
MODULE_ALIAS("acpi*:AMDI0051:*");
+MODULE_ALIAS("acpi*:LGEX0815:*");
struct wl_button {
struct input_dev *input_dev;
@@ -29,6 +31,7 @@ static const struct acpi_device_id wl_ids[] = {
{"HPQ6001", 0},
{"WSTADEF", 0},
{"AMDI0051", 0},
+ {"LGEX0815", 0},
{"", 0},
};
diff --git a/drivers/platform/x86/xo1-rfkill.c b/drivers/platform/x86/xo1-rfkill.c
index e64d5646b4c7..5fe68296501c 100644
--- a/drivers/platform/x86/xo1-rfkill.c
+++ b/drivers/platform/x86/xo1-rfkill.c
@@ -74,5 +74,6 @@ static struct platform_driver xo1_rfkill_driver = {
module_platform_driver(xo1_rfkill_driver);
MODULE_AUTHOR("Daniel Drake <[email protected]>");
+MODULE_DESCRIPTION("OLPC XO-1 software RF kill switch");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:xo1-rfkill");
diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c
index de9121ef4216..d2cb4271a1ca 100644
--- a/drivers/pmdomain/qcom/rpmhpd.c
+++ b/drivers/pmdomain/qcom/rpmhpd.c
@@ -40,6 +40,7 @@
* @addr: Resource address as looped up using resource name from
* cmd-db
* @state_synced: Indicator that sync_state has been invoked for the rpmhpd resource
+ * @skip_retention_level: Indicate that retention level should not be used for the power domain
*/
struct rpmhpd {
struct device *dev;
@@ -56,6 +57,7 @@ struct rpmhpd {
const char *res_name;
u32 addr;
bool state_synced;
+ bool skip_retention_level;
};
struct rpmhpd_desc {
@@ -173,6 +175,7 @@ static struct rpmhpd mxc = {
.pd = { .name = "mxc", },
.peer = &mxc_ao,
.res_name = "mxc.lvl",
+ .skip_retention_level = true,
};
static struct rpmhpd mxc_ao = {
@@ -180,6 +183,7 @@ static struct rpmhpd mxc_ao = {
.active_only = true,
.peer = &mxc,
.res_name = "mxc.lvl",
+ .skip_retention_level = true,
};
static struct rpmhpd nsp = {
@@ -819,6 +823,9 @@ static int rpmhpd_update_level_mapping(struct rpmhpd *rpmhpd)
return -EINVAL;
for (i = 0; i < rpmhpd->level_count; i++) {
+ if (rpmhpd->skip_retention_level && buf[i] == RPMH_REGULATOR_LEVEL_RETENTION)
+ continue;
+
rpmhpd->level[i] = buf[i];
/* Remember the first corner with non-zero level */
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index a15460aaa03b..6b1b8f57cd95 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -296,8 +296,7 @@ static ssize_t max_vclocks_store(struct device *dev,
if (max < ptp->n_vclocks)
goto out;
- size = sizeof(int) * max;
- vclock_index = kzalloc(size, GFP_KERNEL);
+ vclock_index = kcalloc(max, sizeof(int), GFP_KERNEL);
if (!vclock_index) {
err = -ENOMEM;
goto out;
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index a2f231d13a9f..8bae3fd2b330 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -321,22 +321,30 @@ static int stm32_pwm_config(struct stm32_pwm *priv, unsigned int ch,
* First we need to find the minimal value for prescaler such that
*
* period_ns * clkrate
- * ------------------------------
+ * ------------------------------ < max_arr + 1
* NSEC_PER_SEC * (prescaler + 1)
*
- * isn't bigger than max_arr.
+ * This equation is equivalent to
+ *
+ * period_ns * clkrate
+ * ---------------------------- < prescaler + 1
+ * NSEC_PER_SEC * (max_arr + 1)
+ *
+ * Using integer division and knowing that the right hand side is
+ * integer, this is further equivalent to
+ *
+ * (period_ns * clkrate) // (NSEC_PER_SEC * (max_arr + 1)) ≤ prescaler
*/
prescaler = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk),
- (u64)NSEC_PER_SEC * priv->max_arr);
- if (prescaler > 0)
- prescaler -= 1;
-
+ (u64)NSEC_PER_SEC * ((u64)priv->max_arr + 1));
if (prescaler > MAX_TIM_PSC)
return -EINVAL;
prd = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk),
(u64)NSEC_PER_SEC * (prescaler + 1));
+ if (!prd)
+ return -EINVAL;
/*
* All channels share the same prescaler and counter so when two
@@ -673,7 +681,8 @@ static int stm32_pwm_probe(struct platform_device *pdev)
* .apply() won't overflow.
*/
if (clk_get_rate(priv->clk) > 1000000000)
- return dev_err_probe(dev, -EINVAL, "Failed to lock clock\n");
+ return dev_err_probe(dev, -EINVAL, "Clock freq too high (%lu)\n",
+ clk_get_rate(priv->clk));
chip->ops = &stm32pwm_ops;
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 34fcdd82b2ea..f3c447ecdc3b 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -140,7 +140,7 @@
#define AXP717_DCDC1_NUM_VOLTAGES 88
#define AXP717_DCDC2_NUM_VOLTAGES 107
-#define AXP717_DCDC3_NUM_VOLTAGES 104
+#define AXP717_DCDC3_NUM_VOLTAGES 103
#define AXP717_DCDC_V_OUT_MASK GENMASK(6, 0)
#define AXP717_LDO_V_OUT_MASK GENMASK(4, 0)
@@ -763,10 +763,15 @@ static const struct linear_range axp717_dcdc1_ranges[] = {
REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000),
};
+/*
+ * The manual says that the last voltage is 3.4V, encoded as 0b1101011 (107),
+ * but every other method proves that this is wrong, so it's really 106 that
+ * programs the final 3.4V.
+ */
static const struct linear_range axp717_dcdc2_ranges[] = {
REGULATOR_LINEAR_RANGE(500000, 0, 70, 10000),
REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000),
- REGULATOR_LINEAR_RANGE(1600000, 88, 107, 100000),
+ REGULATOR_LINEAR_RANGE(1600000, 88, 106, 100000),
};
static const struct linear_range axp717_dcdc3_ranges[] = {
@@ -790,40 +795,40 @@ static const struct regulator_desc axp717_regulators[] = {
AXP_DESC(AXP717, DCDC4, "dcdc4", "vin4", 1000, 3700, 100,
AXP717_DCDC4_CONTROL, AXP717_DCDC_V_OUT_MASK,
AXP717_DCDC_OUTPUT_CONTROL, BIT(3)),
- AXP_DESC(AXP717, ALDO1, "aldo1", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, ALDO1, "aldo1", "aldoin", 500, 3500, 100,
AXP717_ALDO1_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(0)),
- AXP_DESC(AXP717, ALDO2, "aldo2", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, ALDO2, "aldo2", "aldoin", 500, 3500, 100,
AXP717_ALDO2_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(1)),
- AXP_DESC(AXP717, ALDO3, "aldo3", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, ALDO3, "aldo3", "aldoin", 500, 3500, 100,
AXP717_ALDO3_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(2)),
- AXP_DESC(AXP717, ALDO4, "aldo4", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, ALDO4, "aldo4", "aldoin", 500, 3500, 100,
AXP717_ALDO4_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(3)),
- AXP_DESC(AXP717, BLDO1, "bldo1", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, BLDO1, "bldo1", "bldoin", 500, 3500, 100,
AXP717_BLDO1_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(4)),
- AXP_DESC(AXP717, BLDO2, "bldo2", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, BLDO2, "bldo2", "bldoin", 500, 3500, 100,
AXP717_BLDO2_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(5)),
- AXP_DESC(AXP717, BLDO3, "bldo3", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, BLDO3, "bldo3", "bldoin", 500, 3500, 100,
AXP717_BLDO3_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(6)),
- AXP_DESC(AXP717, BLDO4, "bldo4", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, BLDO4, "bldo4", "bldoin", 500, 3500, 100,
AXP717_BLDO4_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO0_OUTPUT_CONTROL, BIT(7)),
- AXP_DESC(AXP717, CLDO1, "cldo1", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, CLDO1, "cldo1", "cldoin", 500, 3500, 100,
AXP717_CLDO1_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO1_OUTPUT_CONTROL, BIT(0)),
- AXP_DESC(AXP717, CLDO2, "cldo2", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, CLDO2, "cldo2", "cldoin", 500, 3500, 100,
AXP717_CLDO2_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO1_OUTPUT_CONTROL, BIT(1)),
- AXP_DESC(AXP717, CLDO3, "cldo3", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, CLDO3, "cldo3", "cldoin", 500, 3500, 100,
AXP717_CLDO3_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO1_OUTPUT_CONTROL, BIT(2)),
- AXP_DESC(AXP717, CLDO4, "cldo4", "vin1", 500, 3500, 100,
+ AXP_DESC(AXP717, CLDO4, "cldo4", "cldoin", 500, 3500, 100,
AXP717_CLDO4_CONTROL, AXP717_LDO_V_OUT_MASK,
AXP717_LDO1_OUTPUT_CONTROL, BIT(3)),
AXP_DESC(AXP717, CPUSLDO, "cpusldo", "vin1", 500, 1400, 50,
diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c
index 26192d55a685..79fbb45297f6 100644
--- a/drivers/regulator/bd71815-regulator.c
+++ b/drivers/regulator/bd71815-regulator.c
@@ -256,7 +256,7 @@ static int buck12_set_hw_dvs_levels(struct device_node *np,
* 10: 2.50mV/usec 10mV 4uS
* 11: 1.25mV/usec 10mV 8uS
*/
-static const unsigned int bd7181x_ramp_table[] = { 1250, 2500, 5000, 10000 };
+static const unsigned int bd7181x_ramp_table[] = { 10000, 5000, 2500, 1250 };
static int bd7181x_led_set_current_limit(struct regulator_dev *rdev,
int min_uA, int max_uA)
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 5794f4e9dd52..844e9587a880 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3347,6 +3347,7 @@ struct regmap *regulator_get_regmap(struct regulator *regulator)
return map ? map : ERR_PTR(-EOPNOTSUPP);
}
+EXPORT_SYMBOL_GPL(regulator_get_regmap);
/**
* regulator_get_hardware_vsel_register - get the HW voltage selector register
diff --git a/drivers/regulator/tps6594-regulator.c b/drivers/regulator/tps6594-regulator.c
index 4a859f4c0f83..ac53792e3fed 100644
--- a/drivers/regulator/tps6594-regulator.c
+++ b/drivers/regulator/tps6594-regulator.c
@@ -653,18 +653,14 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
}
}
- if (tps->chip_id == LP8764) {
- nr_buck = ARRAY_SIZE(buck_regs);
- nr_ldo = 0;
- nr_types = REGS_INT_NB;
- } else if (tps->chip_id == TPS65224) {
+ if (tps->chip_id == TPS65224) {
nr_buck = ARRAY_SIZE(tps65224_buck_regs);
nr_ldo = ARRAY_SIZE(tps65224_ldo_regs);
- nr_types = REGS_INT_NB;
+ nr_types = TPS65224_REGS_INT_NB;
} else {
nr_buck = ARRAY_SIZE(buck_regs);
- nr_ldo = ARRAY_SIZE(tps6594_ldo_regs);
- nr_types = TPS65224_REGS_INT_NB;
+ nr_ldo = (tps->chip_id == LP8764) ? 0 : ARRAY_SIZE(tps6594_ldo_regs);
+ nr_types = REGS_INT_NB;
}
reg_irq_nb = nr_types * (nr_buck + nr_ldo);
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 7112f5932609..6bb5d9e372e4 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -68,6 +68,7 @@ config RESET_BRCMSTB_RESCAL
config RESET_GPIO
tristate "GPIO reset controller"
+ depends on GPIOLIB
help
This enables a generic reset controller for resets attached via
GPIOs. Typically for OF platforms this driver expects "reset-gpios"
diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
index 5c3267acd2b1..65aa5ff5ed82 100644
--- a/drivers/reset/hisilicon/hi6220_reset.c
+++ b/drivers/reset/hisilicon/hi6220_reset.c
@@ -219,4 +219,5 @@ static int __init hi6220_reset_init(void)
postcore_initcall(hi6220_reset_init);
+MODULE_DESCRIPTION("Hisilicon Hi6220 reset controller driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 2f16f543079b..a76c6af9ea63 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4906,7 +4906,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
ccw++;
if (dst) {
if (ccw->flags & CCW_FLAG_IDA)
- cda = *((char **)dma32_to_virt(ccw->cda));
+ cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda)));
else
cda = dma32_to_virt(ccw->cda);
if (dst != cda) {
@@ -5525,7 +5525,7 @@ dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from,
/* get pointer to data (consider IDALs) */
if (from->flags & CCW_FLAG_IDA)
- datap = (char *)*((addr_t *)dma32_to_virt(from->cda));
+ datap = dma64_to_virt(*((dma64_t *)dma32_to_virt(from->cda)));
else
datap = dma32_to_virt(from->cda);
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 361e9bd75257..9f2023a077c2 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -585,7 +585,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
ccw++;
if (dst) {
if (ccw->flags & CCW_FLAG_IDA)
- cda = *((char **)dma32_to_virt(ccw->cda));
+ cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda)));
else
cda = dma32_to_virt(ccw->cda);
if (dst != cda) {
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 4533dd055ca8..1aa426b1dedd 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -68,7 +68,6 @@ int dasd_gendisk_alloc(struct dasd_block *block)
blk_mq_free_tag_set(&block->tag_set);
return PTR_ERR(gdp);
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue);
/* Initialize gendisk structure. */
gdp->major = DASD_MAJOR;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6d1689a2717e..d5a5d11ae0dc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -548,6 +548,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
{
struct queue_limits lim = {
.logical_block_size = 4096,
+ .features = BLK_FEAT_DAX,
};
int rc, i, j, num_of_segments;
struct dcssblk_dev_info *dev_info;
@@ -643,7 +644,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->gd->fops = &dcssblk_devops;
dev_info->gd->private_data = dev_info;
dev_info->gd->flags |= GENHD_FL_NO_PART;
- blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
seg_byte_size = (dev_info->end - dev_info->start + 1);
set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 1d456a5a3bfb..3fcfe029db1b 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -439,7 +439,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
.logical_block_size = 1 << 12,
};
unsigned int devindex;
- struct request_queue *rq;
int len, ret;
lim.max_segments = min(scmdev->nr_max_block,
@@ -474,10 +473,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
ret = PTR_ERR(bdev->gendisk);
goto out_tag;
}
- rq = bdev->rq = bdev->gendisk->queue;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
-
bdev->gendisk->private_data = scmdev;
bdev->gendisk->fops = &scm_blk_devops;
bdev->gendisk->major = scm_major;
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index d53ee34d398f..fbe29cabcbb8 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -1293,6 +1293,7 @@ sclp_init(void)
fail_unregister_reboot_notifier:
unregister_reboot_notifier(&sclp_reboot_notifier);
fail_init_state_uninitialized:
+ list_del(&sclp_state_change_event.list);
sclp_init_state = sclp_init_state_uninitialized;
free_page((unsigned long) sclp_read_sccb);
free_page((unsigned long) sclp_init_sccb);
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 6e5c508b1e07..5f6e10225627 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -490,13 +490,14 @@ static int ccwchain_fetch_tic(struct ccw1 *ccw,
struct channel_program *cp)
{
struct ccwchain *iter;
- u32 cda, ccw_head;
+ u32 offset, ccw_head;
list_for_each_entry(iter, &cp->ccwchain_list, next) {
ccw_head = iter->ch_iova;
if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
- cda = (u64)iter->ch_ccw + dma32_to_u32(ccw->cda) - ccw_head;
- ccw->cda = u32_to_dma32(cda);
+ /* Calculate offset of TIC target */
+ offset = dma32_to_u32(ccw->cda) - ccw_head;
+ ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset);
return 0;
}
}
@@ -914,7 +915,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
* in the ioctl directly. Path status changes etc.
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
- ccw_head = (u32)(u64)chain->ch_ccw;
+ ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw));
/*
* On successful execution, cpa points just beyond the end
* of the chain.
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index d7569f395559..d6491fc84e8c 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -698,6 +698,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
dma64_t *indicatorp = NULL;
int ret, i, queue_idx = 0;
struct ccw1 *ccw;
+ dma32_t indicatorp_dma = 0;
ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw), NULL);
if (!ccw)
@@ -725,7 +726,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
*/
indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
sizeof(*indicatorp),
- &ccw->cda);
+ &indicatorp_dma);
if (!indicatorp)
goto out;
*indicatorp = indicators_dma(vcdev);
@@ -735,6 +736,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
/* no error, just fall back to legacy interrupts */
vcdev->is_thinint = false;
}
+ ccw->cda = indicatorp_dma;
if (!vcdev->is_thinint) {
/* Register queue indicators with host. */
*indicators(vcdev) = 0;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 065db86d6021..37c24ffea65c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -82,7 +82,6 @@ comment "SCSI support type (disk, tape, CD-ROM)"
config BLK_DEV_SD
tristate "SCSI disk support"
depends on SCSI
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
help
If you want to use SCSI hard disks, Fibre Channel disks,
Serial ATA (SATA) or Parallel ATA (PATA) hard disks,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 60688f18fac6..c708e1059638 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1057,15 +1057,15 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
return 0;
}
-static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
+static int iscsi_sw_tcp_device_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host);
struct iscsi_session *session = tcp_sw_host->session;
struct iscsi_conn *conn = session->leadconn;
if (conn->datadgst_en)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
- sdev->request_queue);
+ lim->features |= BLK_FEAT_STABLE_WRITES;
return 0;
}
@@ -1083,7 +1083,7 @@ static const struct scsi_host_template iscsi_sw_tcp_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.dma_boundary = PAGE_SIZE - 1,
- .slave_configure = iscsi_sw_tcp_slave_configure,
+ .device_configure = iscsi_sw_tcp_device_configure,
.proc_name = "iscsi_tcp",
.this_id = -1,
.track_queue_depth = 1,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4c69fc63c119..cbbe43d8ef87 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -610,15 +610,15 @@ int sas_ata_init(struct domain_device *found_dev)
rc = ata_sas_tport_add(ata_host->dev, ap);
if (rc)
- goto destroy_port;
+ goto free_port;
found_dev->sata_dev.ata_host = ata_host;
found_dev->sata_dev.ap = ap;
return 0;
-destroy_port:
- kfree(ap);
+free_port:
+ ata_port_free(ap);
free_host:
ata_host_put(ata_host);
return rc;
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 8fb7c41c0962..48d975c6dbf2 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref)
if (dev_is_sata(dev) && dev->sata_dev.ap) {
ata_sas_tport_delete(dev->sata_dev.ap);
- kfree(dev->sata_dev.ap);
+ ata_port_free(dev->sata_dev.ap);
ata_host_put(dev->sata_dev.ata_host);
dev->sata_dev.ata_host = NULL;
dev->sata_dev.ap = NULL;
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 85948963fb97..03d6ec1eb970 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -145,6 +145,20 @@ static inline void sas_fail_probe(struct domain_device *dev, const char *func, i
func, dev->parent ? "exp-attached" :
"direct-attached",
SAS_ADDR(dev->sas_addr), err);
+
+ /*
+ * If the device probe failed, the expander phy attached address
+ * needs to be reset so that the phy will not be treated as flutter
+ * in the next revalidation
+ */
+ if (dev->parent && !dev_is_expander(dev->dev_type)) {
+ struct sas_phy *phy = dev->phy;
+ struct domain_device *parent = dev->parent;
+ struct ex_phy *ex_phy = &parent->ex_dev.ex_phy[phy->number];
+
+ memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+ }
+
sas_unregister_dev(dev->port, dev);
}
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 5297cacc8beb..0cef5d089f34 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1363,6 +1363,16 @@ lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport,
atomic_inc(&lpfc_nvmet->xmt_ls_abort);
}
+static int
+lpfc_nvmet_host_traddr(void *hosthandle, u64 *wwnn, u64 *wwpn)
+{
+ struct lpfc_nodelist *ndlp = hosthandle;
+
+ *wwnn = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+ *wwpn = wwn_to_u64(ndlp->nlp_portname.u.wwn);
+ return 0;
+}
+
static void
lpfc_nvmet_host_release(void *hosthandle)
{
@@ -1413,6 +1423,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
.ls_req = lpfc_nvmet_ls_req,
.ls_abort = lpfc_nvmet_ls_abort,
.host_release = lpfc_nvmet_host_release,
+ .host_traddr = lpfc_nvmet_host_traddr,
.max_hw_queues = 1,
.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 88acefbf9aea..6c79c350a4d5 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1981,8 +1981,6 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev,
lim->max_hw_sectors = max_io_size / 512;
lim->virt_boundary_mask = mr_nvme_pg_size - 1;
-
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
}
/*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 870ec2cb4af4..97c2472cd434 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2680,12 +2680,6 @@ scsih_device_configure(struct scsi_device *sdev, struct queue_limits *lim)
pcie_device_put(pcie_device);
spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
mpt3sas_scsih_change_queue_depth(sdev, qdepth);
- /* Enable QUEUE_FLAG_NOMERGES flag, so that IOs won't be
- ** merged and can eliminate holes created during merging
- ** operation.
- **/
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
- sdev->request_queue);
lim->virt_boundary_mask = ioc->page_size - 1;
return 0;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index acf0592d63da..a9d8a9c62663 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -69,6 +69,8 @@ static const char *sdebug_version_date = "20210520";
/* Additional Sense Code (ASC) */
#define NO_ADDITIONAL_SENSE 0x0
+#define OVERLAP_ATOMIC_COMMAND_ASC 0x0
+#define OVERLAP_ATOMIC_COMMAND_ASCQ 0x23
#define LOGICAL_UNIT_NOT_READY 0x4
#define LOGICAL_UNIT_COMMUNICATION_FAILURE 0x8
#define UNRECOVERED_READ_ERR 0x11
@@ -103,6 +105,7 @@ static const char *sdebug_version_date = "20210520";
#define READ_BOUNDARY_ASCQ 0x7
#define ATTEMPT_ACCESS_GAP 0x9
#define INSUFF_ZONE_ASCQ 0xe
+/* see drivers/scsi/sense_codes.h */
/* Additional Sense Code Qualifier (ASCQ) */
#define ACK_NAK_TO 0x3
@@ -152,6 +155,12 @@ static const char *sdebug_version_date = "20210520";
#define DEF_VIRTUAL_GB 0
#define DEF_VPD_USE_HOSTNO 1
#define DEF_WRITESAME_LENGTH 0xFFFF
+#define DEF_ATOMIC_WR 0
+#define DEF_ATOMIC_WR_MAX_LENGTH 8192
+#define DEF_ATOMIC_WR_ALIGN 2
+#define DEF_ATOMIC_WR_GRAN 2
+#define DEF_ATOMIC_WR_MAX_LENGTH_BNDRY (DEF_ATOMIC_WR_MAX_LENGTH)
+#define DEF_ATOMIC_WR_MAX_BNDRY 128
#define DEF_STRICT 0
#define DEF_STATISTICS false
#define DEF_SUBMIT_QUEUES 1
@@ -374,7 +383,9 @@ struct sdebug_host_info {
/* There is an xarray of pointers to this struct's objects, one per host */
struct sdeb_store_info {
- rwlock_t macc_lck; /* for atomic media access on this store */
+ rwlock_t macc_data_lck; /* for media data access on this store */
+ rwlock_t macc_meta_lck; /* for atomic media meta access on this store */
+ rwlock_t macc_sector_lck; /* per-sector media data access on this store */
u8 *storep; /* user data storage (ram) */
struct t10_pi_tuple *dif_storep; /* protection info */
void *map_storep; /* provisioning map */
@@ -398,12 +409,20 @@ struct sdebug_defer {
enum sdeb_defer_type defer_t;
};
+struct sdebug_device_access_info {
+ bool atomic_write;
+ u64 lba;
+ u32 num;
+ struct scsi_cmnd *self;
+};
+
struct sdebug_queued_cmd {
/* corresponding bit set in in_use_bm[] in owning struct sdebug_queue
* instance indicates this slot is in use.
*/
struct sdebug_defer sd_dp;
struct scsi_cmnd *scmd;
+ struct sdebug_device_access_info *i;
};
struct sdebug_scsi_cmd {
@@ -463,7 +482,8 @@ enum sdeb_opcode_index {
SDEB_I_PRE_FETCH = 29, /* 10, 16 */
SDEB_I_ZONE_OUT = 30, /* 0x94+SA; includes no data xfer */
SDEB_I_ZONE_IN = 31, /* 0x95+SA; all have data-in */
- SDEB_I_LAST_ELEM_P1 = 32, /* keep this last (previous + 1) */
+ SDEB_I_ATOMIC_WRITE_16 = 32,
+ SDEB_I_LAST_ELEM_P1 = 33, /* keep this last (previous + 1) */
};
@@ -497,7 +517,8 @@ static const unsigned char opcode_ind_arr[256] = {
0, 0, 0, SDEB_I_VERIFY,
SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME,
SDEB_I_ZONE_OUT, SDEB_I_ZONE_IN, 0, 0,
- 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16,
+ 0, 0, 0, 0,
+ SDEB_I_ATOMIC_WRITE_16, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16,
/* 0xa0; 0xa0->0xbf: 12 byte cdbs */
SDEB_I_REPORT_LUNS, SDEB_I_ATA_PT, 0, SDEB_I_MAINT_IN,
SDEB_I_MAINT_OUT, 0, 0, 0,
@@ -547,6 +568,7 @@ static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_pre_fetch(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_report_zones(struct scsi_cmnd *, struct sdebug_dev_info *);
+static int resp_atomic_write(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_open_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_close_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_finish_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
@@ -788,6 +810,11 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */
{16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} },
+/* 31 */
+ {0, 0x0, 0x0, F_D_OUT | FF_MEDIA_IO,
+ resp_atomic_write, NULL, /* ATOMIC WRITE 16 */
+ {16, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} },
/* sentinel */
{0xff, 0, 0, 0, NULL, NULL, /* terminating element */
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
@@ -835,6 +862,13 @@ static unsigned int sdebug_unmap_granularity = DEF_UNMAP_GRANULARITY;
static unsigned int sdebug_unmap_max_blocks = DEF_UNMAP_MAX_BLOCKS;
static unsigned int sdebug_unmap_max_desc = DEF_UNMAP_MAX_DESC;
static unsigned int sdebug_write_same_length = DEF_WRITESAME_LENGTH;
+static unsigned int sdebug_atomic_wr = DEF_ATOMIC_WR;
+static unsigned int sdebug_atomic_wr_max_length = DEF_ATOMIC_WR_MAX_LENGTH;
+static unsigned int sdebug_atomic_wr_align = DEF_ATOMIC_WR_ALIGN;
+static unsigned int sdebug_atomic_wr_gran = DEF_ATOMIC_WR_GRAN;
+static unsigned int sdebug_atomic_wr_max_length_bndry =
+ DEF_ATOMIC_WR_MAX_LENGTH_BNDRY;
+static unsigned int sdebug_atomic_wr_max_bndry = DEF_ATOMIC_WR_MAX_BNDRY;
static int sdebug_uuid_ctl = DEF_UUID_CTL;
static bool sdebug_random = DEF_RANDOM;
static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
@@ -926,6 +960,7 @@ static const int device_qfull_result =
static const int condition_met_result = SAM_STAT_CONDITION_MET;
static struct dentry *sdebug_debugfs_root;
+static ASYNC_DOMAIN_EXCLUSIVE(sdebug_async_domain);
static void sdebug_err_free(struct rcu_head *head)
{
@@ -1148,6 +1183,8 @@ static int sdebug_target_alloc(struct scsi_target *starget)
if (!targetip)
return -ENOMEM;
+ async_synchronize_full_domain(&sdebug_async_domain);
+
targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
sdebug_debugfs_root);
@@ -1174,7 +1211,8 @@ static void sdebug_target_destroy(struct scsi_target *starget)
targetip = (struct sdebug_target_info *)starget->hostdata;
if (targetip) {
starget->hostdata = NULL;
- async_schedule(sdebug_tartget_cleanup_async, targetip);
+ async_schedule_domain(sdebug_tartget_cleanup_async, targetip,
+ &sdebug_async_domain);
}
}
@@ -1188,6 +1226,11 @@ static inline bool scsi_debug_lbp(void)
(sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10);
}
+static inline bool scsi_debug_atomic_write(void)
+{
+ return sdebug_fake_rw == 0 && sdebug_atomic_wr;
+}
+
static void *lba2fake_store(struct sdeb_store_info *sip,
unsigned long long lba)
{
@@ -1815,6 +1858,14 @@ static int inquiry_vpd_b0(unsigned char *arr)
/* Maximum WRITE SAME Length */
put_unaligned_be64(sdebug_write_same_length, &arr[32]);
+ if (sdebug_atomic_wr) {
+ put_unaligned_be32(sdebug_atomic_wr_max_length, &arr[40]);
+ put_unaligned_be32(sdebug_atomic_wr_align, &arr[44]);
+ put_unaligned_be32(sdebug_atomic_wr_gran, &arr[48]);
+ put_unaligned_be32(sdebug_atomic_wr_max_length_bndry, &arr[52]);
+ put_unaligned_be32(sdebug_atomic_wr_max_bndry, &arr[56]);
+ }
+
return 0x3c; /* Mandatory page length for Logical Block Provisioning */
}
@@ -3377,16 +3428,238 @@ static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip,
return xa_load(per_store_ap, devip->sdbg_host->si_idx);
}
+static inline void
+sdeb_read_lock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __acquire(lock);
+ else
+ read_lock(lock);
+}
+
+static inline void
+sdeb_read_unlock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __release(lock);
+ else
+ read_unlock(lock);
+}
+
+static inline void
+sdeb_write_lock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __acquire(lock);
+ else
+ write_lock(lock);
+}
+
+static inline void
+sdeb_write_unlock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __release(lock);
+ else
+ write_unlock(lock);
+}
+
+static inline void
+sdeb_data_read_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_lock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_read_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_unlock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_write_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_lock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_write_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_unlock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_sector_read_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_lock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_read_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_unlock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_write_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_lock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_write_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_unlock(&sip->macc_sector_lck);
+}
+
+/*
+ * Atomic locking:
+ * We simplify the atomic model to allow only 1x atomic write and many non-
+ * atomic reads or writes for all LBAs.
+
+ * A RW lock has a similar bahaviour:
+ * Only 1x writer and many readers.
+
+ * So use a RW lock for per-device read and write locking:
+ * An atomic access grabs the lock as a writer and non-atomic grabs the lock
+ * as a reader.
+ */
+
+static inline void
+sdeb_data_lock(struct sdeb_store_info *sip, bool atomic)
+{
+ if (atomic)
+ sdeb_data_write_lock(sip);
+ else
+ sdeb_data_read_lock(sip);
+}
+
+static inline void
+sdeb_data_unlock(struct sdeb_store_info *sip, bool atomic)
+{
+ if (atomic)
+ sdeb_data_write_unlock(sip);
+ else
+ sdeb_data_read_unlock(sip);
+}
+
+/* Allow many reads but only 1x write per sector */
+static inline void
+sdeb_data_sector_lock(struct sdeb_store_info *sip, bool do_write)
+{
+ if (do_write)
+ sdeb_data_sector_write_lock(sip);
+ else
+ sdeb_data_sector_read_lock(sip);
+}
+
+static inline void
+sdeb_data_sector_unlock(struct sdeb_store_info *sip, bool do_write)
+{
+ if (do_write)
+ sdeb_data_sector_write_unlock(sip);
+ else
+ sdeb_data_sector_read_unlock(sip);
+}
+
+static inline void
+sdeb_meta_read_lock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __acquire(&sip->macc_meta_lck);
+ else
+ __acquire(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ read_lock(&sip->macc_meta_lck);
+ else
+ read_lock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_read_unlock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __release(&sip->macc_meta_lck);
+ else
+ __release(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ read_unlock(&sip->macc_meta_lck);
+ else
+ read_unlock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_write_lock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __acquire(&sip->macc_meta_lck);
+ else
+ __acquire(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ write_lock(&sip->macc_meta_lck);
+ else
+ write_lock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_write_unlock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __release(&sip->macc_meta_lck);
+ else
+ __release(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ write_unlock(&sip->macc_meta_lck);
+ else
+ write_unlock(&sdeb_fake_rw_lck);
+ }
+}
+
/* Returns number of bytes copied or -1 if error. */
static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
- u32 sg_skip, u64 lba, u32 num, bool do_write,
- u8 group_number)
+ u32 sg_skip, u64 lba, u32 num, u8 group_number,
+ bool do_write, bool atomic)
{
int ret;
- u64 block, rest = 0;
+ u64 block;
enum dma_data_direction dir;
struct scsi_data_buffer *sdb = &scp->sdb;
u8 *fsp;
+ int i;
+
+ /*
+ * Even though reads are inherently atomic (in this driver), we expect
+ * the atomic flag only for writes.
+ */
+ if (!do_write && atomic)
+ return -1;
if (do_write) {
dir = DMA_TO_DEVICE;
@@ -3406,21 +3679,26 @@ static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
fsp = sip->storep;
block = do_div(lba, sdebug_store_sectors);
- if (block + num > sdebug_store_sectors)
- rest = block + num - sdebug_store_sectors;
- ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
+ /* Only allow 1x atomic write or multiple non-atomic writes at any given time */
+ sdeb_data_lock(sip, atomic);
+ for (i = 0; i < num; i++) {
+ /* We shouldn't need to lock for atomic writes, but do it anyway */
+ sdeb_data_sector_lock(sip, do_write);
+ ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
fsp + (block * sdebug_sector_size),
- (num - rest) * sdebug_sector_size, sg_skip, do_write);
- if (ret != (num - rest) * sdebug_sector_size)
- return ret;
-
- if (rest) {
- ret += sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
- fsp, rest * sdebug_sector_size,
- sg_skip + ((num - rest) * sdebug_sector_size),
- do_write);
+ sdebug_sector_size, sg_skip, do_write);
+ sdeb_data_sector_unlock(sip, do_write);
+ if (ret != sdebug_sector_size) {
+ ret += (i * sdebug_sector_size);
+ break;
+ }
+ sg_skip += sdebug_sector_size;
+ if (++block >= sdebug_store_sectors)
+ block = 0;
}
+ ret = num * sdebug_sector_size;
+ sdeb_data_unlock(sip, atomic);
return ret;
}
@@ -3596,70 +3874,6 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec,
return ret;
}
-static inline void
-sdeb_read_lock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __acquire(&sip->macc_lck);
- else
- __acquire(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- read_lock(&sip->macc_lck);
- else
- read_lock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_read_unlock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __release(&sip->macc_lck);
- else
- __release(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- read_unlock(&sip->macc_lck);
- else
- read_unlock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_write_lock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __acquire(&sip->macc_lck);
- else
- __acquire(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- write_lock(&sip->macc_lck);
- else
- write_lock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_write_unlock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __release(&sip->macc_lck);
- else
- __release(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- write_unlock(&sip->macc_lck);
- else
- write_unlock(&sdeb_fake_rw_lck);
- }
-}
-
static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
{
bool check_prot;
@@ -3669,6 +3883,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
u64 lba;
struct sdeb_store_info *sip = devip2sip(devip, true);
u8 *cmd = scp->cmnd;
+ bool meta_data_locked = false;
switch (cmd[0]) {
case READ_16:
@@ -3727,6 +3942,10 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
atomic_set(&sdeb_inject_pending, 0);
}
+ /*
+ * When checking device access params, for reads we only check data
+ * versus what is set at init time, so no need to lock.
+ */
ret = check_device_access_params(scp, lba, num, false);
if (ret)
return ret;
@@ -3746,29 +3965,33 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
- sdeb_read_lock(sip);
+ if (sdebug_dev_is_zoned(devip) ||
+ (sdebug_dix && scsi_prot_sg_count(scp))) {
+ sdeb_meta_read_lock(sip);
+ meta_data_locked = true;
+ }
/* DIX + T10 DIF */
if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) {
switch (prot_verify_read(scp, lba, num, ei_lba)) {
case 1: /* Guard tag error */
if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
return check_condition_result;
} else if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
return illegal_condition_result;
}
break;
case 3: /* Reference tag error */
if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
return check_condition_result;
} else if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
return illegal_condition_result;
}
@@ -3776,8 +3999,9 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, false, 0);
- sdeb_read_unlock(sip);
+ ret = do_device_access(sip, scp, 0, lba, num, 0, false, false);
+ if (meta_data_locked)
+ sdeb_meta_read_unlock(sip);
if (unlikely(ret == -1))
return DID_ERROR << 16;
@@ -3967,6 +4191,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
u64 lba;
struct sdeb_store_info *sip = devip2sip(devip, true);
u8 *cmd = scp->cmnd;
+ bool meta_data_locked = false;
switch (cmd[0]) {
case WRITE_16:
@@ -4025,10 +4250,17 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
"to DIF device\n");
}
- sdeb_write_lock(sip);
+ if (sdebug_dev_is_zoned(devip) ||
+ (sdebug_dix && scsi_prot_sg_count(scp)) ||
+ scsi_debug_lbp()) {
+ sdeb_meta_write_lock(sip);
+ meta_data_locked = true;
+ }
+
ret = check_device_access_params(scp, lba, num, true);
if (ret) {
- sdeb_write_unlock(sip);
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
return ret;
}
@@ -4037,22 +4269,22 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
switch (prot_verify_write(scp, lba, num, ei_lba)) {
case 1: /* Guard tag error */
if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
return illegal_condition_result;
} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
return check_condition_result;
}
break;
case 3: /* Reference tag error */
if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
return illegal_condition_result;
} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
return check_condition_result;
}
@@ -4060,13 +4292,16 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, true, group);
+ ret = do_device_access(sip, scp, 0, lba, num, group, true, false);
if (unlikely(scsi_debug_lbp()))
map_region(sip, lba, num);
+
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
- sdeb_write_unlock(sip);
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
+
if (unlikely(-1 == ret))
return DID_ERROR << 16;
else if (unlikely(sdebug_verbose &&
@@ -4176,7 +4411,8 @@ static int resp_write_scat(struct scsi_cmnd *scp,
goto err_out;
}
- sdeb_write_lock(sip);
+ /* Just keep it simple and always lock for now */
+ sdeb_meta_write_lock(sip);
sg_off = lbdof_blen;
/* Spec says Buffer xfer Length field in number of LBs in dout */
cum_lb = 0;
@@ -4219,7 +4455,11 @@ static int resp_write_scat(struct scsi_cmnd *scp,
}
}
- ret = do_device_access(sip, scp, sg_off, lba, num, true, group);
+ /*
+ * Write ranges atomically to keep as close to pre-atomic
+ * writes behaviour as possible.
+ */
+ ret = do_device_access(sip, scp, sg_off, lba, num, group, true, true);
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
@@ -4258,7 +4498,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
}
ret = 0;
err_out_unlock:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
err_out:
kfree(lrdp);
return ret;
@@ -4277,14 +4517,16 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
scp->device->hostdata, true);
u8 *fs1p;
u8 *fsp;
+ bool meta_data_locked = false;
- sdeb_write_lock(sip);
+ if (sdebug_dev_is_zoned(devip) || scsi_debug_lbp()) {
+ sdeb_meta_write_lock(sip);
+ meta_data_locked = true;
+ }
ret = check_device_access_params(scp, lba, num, true);
- if (ret) {
- sdeb_write_unlock(sip);
- return ret;
- }
+ if (ret)
+ goto out;
if (unmap && scsi_debug_lbp()) {
unmap_region(sip, lba, num);
@@ -4295,6 +4537,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
/* if ndob then zero 1 logical block, else fetch 1 logical block */
fsp = sip->storep;
fs1p = fsp + (block * lb_size);
+ sdeb_data_write_lock(sip);
if (ndob) {
memset(fs1p, 0, lb_size);
ret = 0;
@@ -4302,8 +4545,8 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
ret = fetch_to_dev_buffer(scp, fs1p, lb_size);
if (-1 == ret) {
- sdeb_write_unlock(sip);
- return DID_ERROR << 16;
+ ret = DID_ERROR << 16;
+ goto out;
} else if (sdebug_verbose && !ndob && (ret < lb_size))
sdev_printk(KERN_INFO, scp->device,
"%s: %s: lb size=%u, IO sent=%d bytes\n",
@@ -4320,10 +4563,12 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
+ sdeb_data_write_unlock(sip);
+ ret = 0;
out:
- sdeb_write_unlock(sip);
-
- return 0;
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
+ return ret;
}
static int resp_write_same_10(struct scsi_cmnd *scp,
@@ -4466,25 +4711,30 @@ static int resp_comp_write(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
-
ret = do_dout_fetch(scp, dnum, arr);
if (ret == -1) {
retval = DID_ERROR << 16;
- goto cleanup;
+ goto cleanup_free;
} else if (sdebug_verbose && (ret < (dnum * lb_size)))
sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb "
"indicated=%u, IO sent=%d bytes\n", my_name,
dnum * lb_size, ret);
+
+ sdeb_data_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (!comp_write_worker(sip, lba, num, arr, false)) {
mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0);
retval = check_condition_result;
- goto cleanup;
+ goto cleanup_unlock;
}
+
+ /* Cover sip->map_storep (which map_region()) sets with data lock */
if (scsi_debug_lbp())
map_region(sip, lba, num);
-cleanup:
- sdeb_write_unlock(sip);
+cleanup_unlock:
+ sdeb_meta_write_unlock(sip);
+ sdeb_data_write_unlock(sip);
+cleanup_free:
kfree(arr);
return retval;
}
@@ -4528,7 +4778,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
desc = (void *)&buf[8];
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
for (i = 0 ; i < descriptors ; i++) {
unsigned long long lba = get_unaligned_be64(&desc[i].lba);
@@ -4544,7 +4794,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
ret = 0;
out:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
kfree(buf);
return ret;
@@ -4702,12 +4952,13 @@ static int resp_pre_fetch(struct scsi_cmnd *scp,
rest = block + nblks - sdebug_store_sectors;
/* Try to bring the PRE-FETCH range into CPU's cache */
- sdeb_read_lock(sip);
+ sdeb_data_read_lock(sip);
prefetch_range(fsp + (sdebug_sector_size * block),
(nblks - rest) * sdebug_sector_size);
if (rest)
prefetch_range(fsp, rest * sdebug_sector_size);
- sdeb_read_unlock(sip);
+
+ sdeb_data_read_unlock(sip);
fini:
if (cmd[1] & 0x2)
res = SDEG_RES_IMMED_MASK;
@@ -4866,7 +5117,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
/* Not changing store, so only need read access */
- sdeb_read_lock(sip);
+ sdeb_data_read_lock(sip);
ret = do_dout_fetch(scp, a_num, arr);
if (ret == -1) {
@@ -4888,7 +5139,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
goto cleanup;
}
cleanup:
- sdeb_read_unlock(sip);
+ sdeb_data_read_unlock(sip);
kfree(arr);
return ret;
}
@@ -4934,7 +5185,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_read_lock(sip);
+ sdeb_meta_read_lock(sip);
desc = arr + 64;
for (lba = zs_lba; lba < sdebug_capacity;
@@ -5032,11 +5283,70 @@ static int resp_report_zones(struct scsi_cmnd *scp,
ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len));
fini:
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
kfree(arr);
return ret;
}
+static int resp_atomic_write(struct scsi_cmnd *scp,
+ struct sdebug_dev_info *devip)
+{
+ struct sdeb_store_info *sip;
+ u8 *cmd = scp->cmnd;
+ u16 boundary, len;
+ u64 lba, lba_tmp;
+ int ret;
+
+ if (!scsi_debug_atomic_write()) {
+ mk_sense_invalid_opcode(scp);
+ return check_condition_result;
+ }
+
+ sip = devip2sip(devip, true);
+
+ lba = get_unaligned_be64(cmd + 2);
+ boundary = get_unaligned_be16(cmd + 10);
+ len = get_unaligned_be16(cmd + 12);
+
+ lba_tmp = lba;
+ if (sdebug_atomic_wr_align &&
+ do_div(lba_tmp, sdebug_atomic_wr_align)) {
+ /* Does not meet alignment requirement */
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ return check_condition_result;
+ }
+
+ if (sdebug_atomic_wr_gran && len % sdebug_atomic_wr_gran) {
+ /* Does not meet alignment requirement */
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ return check_condition_result;
+ }
+
+ if (boundary > 0) {
+ if (boundary > sdebug_atomic_wr_max_bndry) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+
+ if (len > sdebug_atomic_wr_max_length_bndry) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+ } else {
+ if (len > sdebug_atomic_wr_max_length) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+ }
+
+ ret = do_device_access(sip, scp, 0, lba, len, 0, true, true);
+ if (unlikely(ret == -1))
+ return DID_ERROR << 16;
+ if (unlikely(ret != len * sdebug_sector_size))
+ return DID_ERROR << 16;
+ return 0;
+}
+
/* Logic transplanted from tcmu-runner, file_zbc.c */
static void zbc_open_all(struct sdebug_dev_info *devip)
{
@@ -5063,8 +5373,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
mk_sense_invalid_opcode(scp);
return check_condition_result;
}
-
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
/* Check if all closed zones can be open */
@@ -5113,7 +5422,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
zbc_open_zone(devip, zsp, true);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5140,7 +5449,7 @@ static int resp_close_zone(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_close_all(devip);
@@ -5169,7 +5478,7 @@ static int resp_close_zone(struct scsi_cmnd *scp,
zbc_close_zone(devip, zsp);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5212,7 +5521,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_finish_all(devip);
@@ -5241,7 +5550,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp,
zbc_finish_zone(devip, zsp, true);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5292,7 +5601,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_rwp_all(devip);
@@ -5320,7 +5629,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
zbc_rwp_zone(devip, zsp);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -6284,6 +6593,7 @@ module_param_named(lbprz, sdebug_lbprz, int, S_IRUGO);
module_param_named(lbpu, sdebug_lbpu, int, S_IRUGO);
module_param_named(lbpws, sdebug_lbpws, int, S_IRUGO);
module_param_named(lbpws10, sdebug_lbpws10, int, S_IRUGO);
+module_param_named(atomic_wr, sdebug_atomic_wr, int, S_IRUGO);
module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO);
module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR);
module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR);
@@ -6318,6 +6628,11 @@ module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);
module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);
module_param_named(unmap_max_blocks, sdebug_unmap_max_blocks, int, S_IRUGO);
module_param_named(unmap_max_desc, sdebug_unmap_max_desc, int, S_IRUGO);
+module_param_named(atomic_wr_max_length, sdebug_atomic_wr_max_length, int, S_IRUGO);
+module_param_named(atomic_wr_align, sdebug_atomic_wr_align, int, S_IRUGO);
+module_param_named(atomic_wr_gran, sdebug_atomic_wr_gran, int, S_IRUGO);
+module_param_named(atomic_wr_max_length_bndry, sdebug_atomic_wr_max_length_bndry, int, S_IRUGO);
+module_param_named(atomic_wr_max_bndry, sdebug_atomic_wr_max_bndry, int, S_IRUGO);
module_param_named(uuid_ctl, sdebug_uuid_ctl, int, S_IRUGO);
module_param_named(virtual_gb, sdebug_virtual_gb, int, S_IRUGO | S_IWUSR);
module_param_named(vpd_use_hostno, sdebug_vpd_use_hostno, int,
@@ -6361,6 +6676,7 @@ MODULE_PARM_DESC(lbprz,
MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)");
MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)");
+MODULE_PARM_DESC(atomic_write, "enable ATOMIC WRITE support, support WRITE ATOMIC(16) (def=0)");
MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
@@ -6392,6 +6708,11 @@ MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)"
MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");
MODULE_PARM_DESC(unmap_max_blocks, "max # of blocks can be unmapped in one cmd (def=0xffffffff)");
MODULE_PARM_DESC(unmap_max_desc, "max # of ranges that can be unmapped in one cmd (def=256)");
+MODULE_PARM_DESC(atomic_wr_max_length, "max # of blocks can be atomically written in one cmd (def=8192)");
+MODULE_PARM_DESC(atomic_wr_align, "minimum alignment of atomic write in blocks (def=2)");
+MODULE_PARM_DESC(atomic_wr_gran, "minimum granularity of atomic write in blocks (def=2)");
+MODULE_PARM_DESC(atomic_wr_max_length_bndry, "max # of blocks can be atomically written in one cmd with boundary set (def=8192)");
+MODULE_PARM_DESC(atomic_wr_max_bndry, "max # boundaries per atomic write (def=128)");
MODULE_PARM_DESC(uuid_ctl,
"1->use uuid for lu name, 0->don't, 2->all use same (def=0)");
MODULE_PARM_DESC(virtual_gb, "virtual gigabyte (GiB) size (def=0 -> use dev_size_mb)");
@@ -7563,6 +7884,7 @@ static int __init scsi_debug_init(void)
return -EINVAL;
}
}
+
xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
if (want_store) {
idx = sdebug_add_store();
@@ -7770,7 +8092,9 @@ static int sdebug_add_store(void)
map_region(sip, 0, 2);
}
- rwlock_init(&sip->macc_lck);
+ rwlock_init(&sip->macc_data_lck);
+ rwlock_init(&sip->macc_meta_lck);
+ rwlock_init(&sip->macc_sector_lck);
return (int)n_idx;
err:
sdebug_erase_store((int)n_idx, sip);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ec39acc986d6..3958a6d14bf4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -631,8 +631,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
if (blk_update_request(req, error, bytes))
return true;
- // XXX:
- if (blk_queue_add_random(q))
+ if (q->limits.features & BLK_FEAT_ADD_RANDOM)
add_disk_randomness(req->q->disk);
WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
@@ -1140,9 +1139,9 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
*/
count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
- if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+ if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) {
unsigned int pad_len =
- (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+ (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
last_sg->length += pad_len;
cmd->extra_len += pad_len;
@@ -1987,7 +1986,7 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim)
shost->dma_alignment, dma_get_cache_alignment() - 1);
if (shost->no_highmem)
- lim->bounce = BLK_BOUNCE_HIGH;
+ lim->features |= BLK_FEAT_BOUNCE_HIGH;
dma_set_seg_boundary(dev, shost->dma_boundary);
dma_set_max_seg_size(dev, shost->max_segment_size);
diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c
index 41a950075913..3e47c4472a80 100644
--- a/drivers/scsi/scsi_trace.c
+++ b/drivers/scsi/scsi_trace.c
@@ -326,6 +326,26 @@ out:
}
static const char *
+scsi_trace_atomic_write16_out(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned int boundary_size;
+ unsigned int nr_blocks;
+ sector_t lba;
+
+ lba = get_unaligned_be64(&cdb[2]);
+ boundary_size = get_unaligned_be16(&cdb[10]);
+ nr_blocks = get_unaligned_be16(&cdb[12]);
+
+ trace_seq_printf(p, "lba=%llu txlen=%u boundary_size=%u",
+ lba, nr_blocks, boundary_size);
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *
scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
{
switch (SERVICE_ACTION32(cdb)) {
@@ -385,6 +405,8 @@ scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
return scsi_trace_zbc_in(p, cdb, len);
case ZBC_OUT:
return scsi_trace_zbc_out(p, cdb, len);
+ case WRITE_ATOMIC_16:
+ return scsi_trace_atomic_write16_out(p, cdb, len);
default:
return scsi_trace_misc(p, cdb, len);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 37dd6ead72a4..2e933fd1de70 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -63,6 +63,7 @@
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_device.h>
+#include <scsi/scsi_devinfo.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
@@ -101,12 +102,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC);
#define SD_MINORS 16
-static void sd_config_discard(struct scsi_disk *, unsigned int);
-static void sd_config_write_same(struct scsi_disk *);
+static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned int mode);
+static void sd_config_write_same(struct scsi_disk *sdkp,
+ struct queue_limits *lim);
static int sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
static void sd_shutdown(struct device *);
-static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
static void scsi_disk_release(struct device *cdev);
static DEFINE_IDA(sd_index_ida);
@@ -119,17 +121,18 @@ static const char *sd_cache_types[] = {
"write back, no read (daft)"
};
-static void sd_set_flush_flag(struct scsi_disk *sdkp)
+static void sd_set_flush_flag(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- bool wc = false, fua = false;
-
if (sdkp->WCE) {
- wc = true;
+ lim->features |= BLK_FEAT_WRITE_CACHE;
if (sdkp->DPOFUA)
- fua = true;
+ lim->features |= BLK_FEAT_FUA;
+ else
+ lim->features &= ~BLK_FEAT_FUA;
+ } else {
+ lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
}
-
- blk_queue_write_cache(sdkp->disk->queue, wc, fua);
}
static ssize_t
@@ -167,9 +170,18 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0;
if (sdkp->cache_override) {
+ struct queue_limits lim;
+
sdkp->WCE = wce;
sdkp->RCD = rcd;
- sd_set_flush_flag(sdkp);
+
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_set_flush_flag(sdkp, &lim);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ ret = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (ret)
+ return ret;
return count;
}
@@ -456,16 +468,12 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
- int mode;
+ struct queue_limits lim;
+ int mode, err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (sd_is_zoned(sdkp)) {
- sd_config_discard(sdkp, SD_LBP_DISABLE);
- return count;
- }
-
if (sdp->type != TYPE_DISK)
return -EINVAL;
@@ -473,8 +481,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
if (mode < 0)
return -EINVAL;
- sd_config_discard(sdkp, mode);
-
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_config_discard(sdkp, &lim, mode);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
return count;
}
static DEVICE_ATTR_RW(provisioning_mode);
@@ -557,6 +570,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
+ struct queue_limits lim;
unsigned long max;
int err;
@@ -578,8 +592,13 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
sdkp->max_ws_blocks = max;
}
- sd_config_write_same(sdkp);
-
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_config_write_same(sdkp, &lim);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
return count;
}
static DEVICE_ATTR_RW(max_write_same_blocks);
@@ -822,25 +841,28 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
return protect;
}
-static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
+static void sd_disable_discard(struct scsi_disk *sdkp)
+{
+ sdkp->provisioning_mode = SD_LBP_DISABLE;
+ blk_queue_disable_discard(sdkp->disk->queue);
+}
+
+static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned int mode)
{
- struct request_queue *q = sdkp->disk->queue;
unsigned int logical_block_size = sdkp->device->sector_size;
unsigned int max_blocks = 0;
- q->limits.discard_alignment =
- sdkp->unmap_alignment * logical_block_size;
- q->limits.discard_granularity =
- max(sdkp->physical_block_size,
- sdkp->unmap_granularity * logical_block_size);
+ lim->discard_alignment = sdkp->unmap_alignment * logical_block_size;
+ lim->discard_granularity = max(sdkp->physical_block_size,
+ sdkp->unmap_granularity * logical_block_size);
sdkp->provisioning_mode = mode;
switch (mode) {
case SD_LBP_FULL:
case SD_LBP_DISABLE:
- blk_queue_max_discard_sectors(q, 0);
- return;
+ break;
case SD_LBP_UNMAP:
max_blocks = min_not_zero(sdkp->max_unmap_blocks,
@@ -871,7 +893,8 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
break;
}
- blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
+ lim->max_hw_discard_sectors = max_blocks *
+ (logical_block_size >> SECTOR_SHIFT);
}
static void *sd_set_special_bvec(struct request *rq, unsigned int data_len)
@@ -917,6 +940,64 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
return scsi_alloc_sgtables(cmd);
}
+static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim)
+{
+ unsigned int logical_block_size = sdkp->device->sector_size,
+ physical_block_size_sectors, max_atomic, unit_min, unit_max;
+
+ if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) ||
+ sdkp->protection_type == T10_PI_TYPE2_PROTECTION)
+ return;
+
+ physical_block_size_sectors = sdkp->physical_block_size /
+ sdkp->device->sector_size;
+
+ unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ?
+ sdkp->atomic_granularity :
+ physical_block_size_sectors);
+
+ /*
+ * Only use atomic boundary when we have the odd scenario of
+ * sdkp->max_atomic == 0, which the spec does permit.
+ */
+ if (sdkp->max_atomic) {
+ max_atomic = sdkp->max_atomic;
+ unit_max = rounddown_pow_of_two(sdkp->max_atomic);
+ sdkp->use_atomic_write_boundary = 0;
+ } else {
+ max_atomic = sdkp->max_atomic_with_boundary;
+ unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary);
+ sdkp->use_atomic_write_boundary = 1;
+ }
+
+ /*
+ * Ensure compliance with granularity and alignment. For now, keep it
+ * simple and just don't support atomic writes for values mismatched
+ * with max_{boundary}atomic, physical block size, and
+ * atomic_granularity itself.
+ *
+ * We're really being distrustful by checking unit_max also...
+ */
+ if (sdkp->atomic_granularity > 1) {
+ if (unit_min > 1 && unit_min % sdkp->atomic_granularity)
+ return;
+ if (unit_max > 1 && unit_max % sdkp->atomic_granularity)
+ return;
+ }
+
+ if (sdkp->atomic_alignment > 1) {
+ if (unit_min > 1 && unit_min % sdkp->atomic_alignment)
+ return;
+ if (unit_max > 1 && unit_max % sdkp->atomic_alignment)
+ return;
+ }
+
+ lim->atomic_write_hw_max = max_atomic * logical_block_size;
+ lim->atomic_write_hw_boundary = 0;
+ lim->atomic_write_hw_unit_min = unit_min * logical_block_size;
+ lim->atomic_write_hw_unit_max = unit_max * logical_block_size;
+}
+
static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
bool unmap)
{
@@ -999,9 +1080,16 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
return sd_setup_write_same10_cmnd(cmd, false);
}
-static void sd_config_write_same(struct scsi_disk *sdkp)
+static void sd_disable_write_same(struct scsi_disk *sdkp)
+{
+ sdkp->device->no_write_same = 1;
+ sdkp->max_ws_blocks = 0;
+ blk_queue_disable_write_zeroes(sdkp->disk->queue);
+}
+
+static void sd_config_write_same(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- struct request_queue *q = sdkp->disk->queue;
unsigned int logical_block_size = sdkp->device->sector_size;
if (sdkp->device->no_write_same) {
@@ -1055,8 +1143,8 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
}
out:
- blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
- (logical_block_size >> 9));
+ lim->max_write_zeroes_sectors =
+ sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT);
}
static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
@@ -1208,6 +1296,26 @@ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd)
return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1;
}
+static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd,
+ sector_t lba, unsigned int nr_blocks,
+ bool boundary, unsigned char flags)
+{
+ cmd->cmd_len = 16;
+ cmd->cmnd[0] = WRITE_ATOMIC_16;
+ cmd->cmnd[1] = flags;
+ put_unaligned_be64(lba, &cmd->cmnd[2]);
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
+ if (boundary)
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[10]);
+ else
+ put_unaligned_be16(0, &cmd->cmnd[10]);
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
+ cmd->cmnd[14] = 0;
+ cmd->cmnd[15] = 0;
+
+ return BLK_STS_OK;
+}
+
static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = scsi_cmd_to_rq(cmd);
@@ -1273,6 +1381,10 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
+ } else if (rq->cmd_flags & REQ_ATOMIC && write) {
+ ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks,
+ sdkp->use_atomic_write_boundary,
+ protect | fua);
} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
@@ -2246,15 +2358,14 @@ static int sd_done(struct scsi_cmnd *SCpnt)
case 0x24: /* INVALID FIELD IN CDB */
switch (SCpnt->cmnd[0]) {
case UNMAP:
- sd_config_discard(sdkp, SD_LBP_DISABLE);
+ sd_disable_discard(sdkp);
break;
case WRITE_SAME_16:
case WRITE_SAME:
if (SCpnt->cmnd[1] & 8) { /* UNMAP */
- sd_config_discard(sdkp, SD_LBP_DISABLE);
+ sd_disable_discard(sdkp);
} else {
- sdkp->device->no_write_same = 1;
- sd_config_write_same(sdkp);
+ sd_disable_write_same(sdkp);
req->rq_flags |= RQF_QUIET;
}
break;
@@ -2266,7 +2377,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
}
out:
- if (sd_is_zoned(sdkp))
+ if (sdkp->device->type == TYPE_ZBC)
good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr);
SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
@@ -2460,11 +2571,13 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
return 0;
}
-static void sd_config_protection(struct scsi_disk *sdkp)
+static void sd_config_protection(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
struct scsi_device *sdp = sdkp->device;
- sd_dif_config_host(sdkp);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ sd_dif_config_host(sdkp, lim);
if (!sdkp->protection_type)
return;
@@ -2513,7 +2626,7 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
#define READ_CAPACITY_RETRIES_ON_RESET 10
static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
- unsigned char *buffer)
+ struct queue_limits *lim, unsigned char *buffer)
{
unsigned char cmd[16];
struct scsi_sense_hdr sshdr;
@@ -2587,7 +2700,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
/* Lowest aligned logical block */
alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size;
- blk_queue_alignment_offset(sdp->request_queue, alignment);
+ lim->alignment_offset = alignment;
if (alignment && sdkp->first_scan)
sd_printk(KERN_NOTICE, sdkp,
"physical block alignment offset: %u\n", alignment);
@@ -2598,7 +2711,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
if (buffer[14] & 0x40) /* LBPRZ */
sdkp->lbprz = 1;
- sd_config_discard(sdkp, SD_LBP_WS16);
+ sd_config_discard(sdkp, lim, SD_LBP_WS16);
}
sdkp->capacity = lba + 1;
@@ -2701,13 +2814,14 @@ static int sd_try_rc16_first(struct scsi_device *sdp)
* read disk capacity
*/
static void
-sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
+sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned char *buffer)
{
int sector_size;
struct scsi_device *sdp = sdkp->device;
if (sd_try_rc16_first(sdp)) {
- sector_size = read_capacity_16(sdkp, sdp, buffer);
+ sector_size = read_capacity_16(sdkp, sdp, lim, buffer);
if (sector_size == -EOVERFLOW)
goto got_data;
if (sector_size == -ENODEV)
@@ -2727,7 +2841,7 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
int old_sector_size = sector_size;
sd_printk(KERN_NOTICE, sdkp, "Very big device. "
"Trying to use READ CAPACITY(16).\n");
- sector_size = read_capacity_16(sdkp, sdp, buffer);
+ sector_size = read_capacity_16(sdkp, sdp, lim, buffer);
if (sector_size < 0) {
sd_printk(KERN_NOTICE, sdkp,
"Using 0xffffffff as device size\n");
@@ -2786,9 +2900,8 @@ got_data:
*/
sector_size = 512;
}
- blk_queue_logical_block_size(sdp->request_queue, sector_size);
- blk_queue_physical_block_size(sdp->request_queue,
- sdkp->physical_block_size);
+ lim->logical_block_size = sector_size;
+ lim->physical_block_size = sdkp->physical_block_size;
sdkp->device->sector_size = sector_size;
if (sdkp->capacity > 0xffffffff)
@@ -3118,6 +3231,9 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
struct scsi_mode_data data;
int res;
+ if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS)
+ return;
+
res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
/*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT,
sdkp->max_retries, &data, &sshdr);
@@ -3191,11 +3307,30 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
return;
}
-/**
- * sd_read_block_limits - Query disk device for preferred I/O sizes.
- * @sdkp: disk to query
+static unsigned int sd_discard_mode(struct scsi_disk *sdkp)
+{
+ if (!sdkp->lbpvpd) {
+ /* LBP VPD page not provided */
+ if (sdkp->max_unmap_blocks)
+ return SD_LBP_UNMAP;
+ return SD_LBP_WS16;
+ }
+
+ /* LBP VPD page tells us what to use */
+ if (sdkp->lbpu && sdkp->max_unmap_blocks)
+ return SD_LBP_UNMAP;
+ if (sdkp->lbpws)
+ return SD_LBP_WS16;
+ if (sdkp->lbpws10)
+ return SD_LBP_WS10;
+ return SD_LBP_DISABLE;
+}
+
+/*
+ * Query disk device for preferred I/O sizes.
*/
-static void sd_read_block_limits(struct scsi_disk *sdkp)
+static void sd_read_block_limits(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
struct scsi_vpd *vpd;
@@ -3215,7 +3350,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]);
if (!sdkp->lbpme)
- goto out;
+ goto config_atomic;
lba_count = get_unaligned_be32(&vpd->data[20]);
desc_count = get_unaligned_be32(&vpd->data[24]);
@@ -3229,23 +3364,16 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
sdkp->unmap_alignment =
get_unaligned_be32(&vpd->data[32]) & ~(1 << 31);
- if (!sdkp->lbpvpd) { /* LBP VPD page not provided */
-
- if (sdkp->max_unmap_blocks)
- sd_config_discard(sdkp, SD_LBP_UNMAP);
- else
- sd_config_discard(sdkp, SD_LBP_WS16);
-
- } else { /* LBP VPD page tells us what to use */
- if (sdkp->lbpu && sdkp->max_unmap_blocks)
- sd_config_discard(sdkp, SD_LBP_UNMAP);
- else if (sdkp->lbpws)
- sd_config_discard(sdkp, SD_LBP_WS16);
- else if (sdkp->lbpws10)
- sd_config_discard(sdkp, SD_LBP_WS10);
- else
- sd_config_discard(sdkp, SD_LBP_DISABLE);
- }
+ sd_config_discard(sdkp, lim, sd_discard_mode(sdkp));
+
+config_atomic:
+ sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]);
+ sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]);
+ sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]);
+ sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]);
+ sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]);
+
+ sd_config_atomic(sdkp, lim);
}
out:
@@ -3264,13 +3392,10 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp)
rcu_read_unlock();
}
-/**
- * sd_read_block_characteristics - Query block dev. characteristics
- * @sdkp: disk to query
- */
-static void sd_read_block_characteristics(struct scsi_disk *sdkp)
+/* Query block device characteristics */
+static void sd_read_block_characteristics(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- struct request_queue *q = sdkp->disk->queue;
struct scsi_vpd *vpd;
u16 rot;
@@ -3286,37 +3411,13 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
sdkp->zoned = (vpd->data[8] >> 4) & 3;
rcu_read_unlock();
- if (rot == 1) {
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
- }
-
-
-#ifdef CONFIG_BLK_DEV_ZONED /* sd_probe rejects ZBD devices early otherwise */
- if (sdkp->device->type == TYPE_ZBC) {
- /*
- * Host-managed.
- */
- disk_set_zoned(sdkp->disk);
-
- /*
- * Per ZBC and ZAC specifications, writes in sequential write
- * required zones of host-managed devices must be aligned to
- * the device physical block size.
- */
- blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
- } else {
- /*
- * Host-aware devices are treated as conventional.
- */
- WARN_ON_ONCE(blk_queue_is_zoned(q));
- }
-#endif /* CONFIG_BLK_DEV_ZONED */
+ if (rot == 1)
+ lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
if (!sdkp->first_scan)
return;
- if (blk_queue_is_zoned(q))
+ if (sdkp->device->type == TYPE_ZBC)
sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n");
else if (sdkp->zoned == 1)
sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n");
@@ -3597,10 +3698,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
{
struct scsi_disk *sdkp = scsi_disk(disk);
struct scsi_device *sdp = sdkp->device;
- struct request_queue *q = sdkp->disk->queue;
sector_t old_capacity = sdkp->capacity;
+ struct queue_limits lim;
unsigned char *buffer;
- unsigned int dev_max, rw_max;
+ unsigned int dev_max;
+ int err;
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
"sd_revalidate_disk\n"));
@@ -3621,12 +3723,14 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_spinup_disk(sdkp);
+ lim = queue_limits_start_update(sdkp->disk->queue);
+
/*
* Without media there is no reason to ask; moreover, some devices
* react badly if we do.
*/
if (sdkp->media_present) {
- sd_read_capacity(sdkp, buffer);
+ sd_read_capacity(sdkp, &lim, buffer);
/*
* Some USB/UAS devices return generic values for mode pages
* until the media has been accessed. Trigger a READ operation
@@ -3640,15 +3744,14 @@ static int sd_revalidate_disk(struct gendisk *disk)
* cause this to be updated correctly and any device which
* doesn't support it should be treated as rotational.
*/
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
+ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
if (scsi_device_supports_vpd(sdp)) {
sd_read_block_provisioning(sdkp);
- sd_read_block_limits(sdkp);
+ sd_read_block_limits(sdkp, &lim);
sd_read_block_limits_ext(sdkp);
- sd_read_block_characteristics(sdkp);
- sd_zbc_read_zones(sdkp, buffer);
+ sd_read_block_characteristics(sdkp, &lim);
+ sd_zbc_read_zones(sdkp, &lim, buffer);
sd_read_cpr(sdkp);
}
@@ -3660,64 +3763,50 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_app_tag_own(sdkp, buffer);
sd_read_write_same(sdkp, buffer);
sd_read_security(sdkp, buffer);
- sd_config_protection(sdkp);
+ sd_config_protection(sdkp, &lim);
}
/*
* We now have all cache related info, determine how we deal
* with flush requests.
*/
- sd_set_flush_flag(sdkp);
+ sd_set_flush_flag(sdkp, &lim);
/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
/* Some devices report a maximum block count for READ/WRITE requests. */
dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
- q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+ lim.max_dev_sectors = logical_to_sectors(sdp, dev_max);
if (sd_validate_min_xfer_size(sdkp))
- blk_queue_io_min(sdkp->disk->queue,
- logical_to_bytes(sdp, sdkp->min_xfer_blocks));
+ lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks);
else
- blk_queue_io_min(sdkp->disk->queue, 0);
-
- if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
- q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
- rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
- } else {
- q->limits.io_opt = 0;
- rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
- (sector_t)BLK_DEF_MAX_SECTORS_CAP);
- }
+ lim.io_min = 0;
/*
* Limit default to SCSI host optimal sector limit if set. There may be
* an impact on performance for when the size of a request exceeds this
* host limit.
*/
- rw_max = min_not_zero(rw_max, sdp->host->opt_sectors);
-
- /* Do not exceed controller limit */
- rw_max = min(rw_max, queue_max_hw_sectors(q));
-
- /*
- * Only update max_sectors if previously unset or if the current value
- * exceeds the capabilities of the hardware.
- */
- if (sdkp->first_scan ||
- q->limits.max_sectors > q->limits.max_dev_sectors ||
- q->limits.max_sectors > q->limits.max_hw_sectors) {
- q->limits.max_sectors = rw_max;
- q->limits.max_user_sectors = rw_max;
+ lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT;
+ if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
+ lim.io_opt = min_not_zero(lim.io_opt,
+ logical_to_bytes(sdp, sdkp->opt_xfer_blocks));
}
sdkp->first_scan = 0;
set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity));
- sd_config_write_same(sdkp);
+ sd_config_write_same(sdkp, &lim);
kfree(buffer);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
+
/*
* For a zoned drive, revalidating the zones can be done only once
* the gendisk capacity is set. So if this fails, set back the gendisk
@@ -4115,8 +4204,6 @@ static int sd_resume(struct device *dev)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-
if (opal_unlock_from_suspend(sdkp->opal_dev)) {
sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
return -EIO;
@@ -4133,12 +4220,13 @@ static int sd_resume_common(struct device *dev, bool runtime)
if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
return 0;
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
if (!sd_do_start_stop(sdkp->device, runtime)) {
sdkp->suspended = false;
return 0;
}
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
sd_resume(dev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 49dd600bfa48..36382eca941c 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -115,6 +115,13 @@ struct scsi_disk {
u32 max_unmap_blocks;
u32 unmap_granularity;
u32 unmap_alignment;
+
+ u32 max_atomic;
+ u32 atomic_alignment;
+ u32 atomic_granularity;
+ u32 max_atomic_with_boundary;
+ u32 max_atomic_boundary;
+
u32 index;
unsigned int physical_block_size;
unsigned int max_medium_access_timeouts;
@@ -148,6 +155,7 @@ struct scsi_disk {
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
unsigned rscs : 1; /* reduced stream control support */
+ unsigned use_atomic_write_boundary : 1;
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
@@ -220,26 +228,12 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec
return sector >> (ilog2(sdev->sector_size) - 9);
}
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-
-extern void sd_dif_config_host(struct scsi_disk *);
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline void sd_dif_config_host(struct scsi_disk *disk)
-{
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline int sd_is_zoned(struct scsi_disk *sdkp)
-{
- return sdkp->zoned == 1 || sdkp->device->type == TYPE_ZBC;
-}
+void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED
-int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
+int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
+ u8 buf[SD_BUF_SIZE]);
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
unsigned char op, bool all);
@@ -250,7 +244,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
#else /* CONFIG_BLK_DEV_ZONED */
-static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
+static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
+ struct queue_limits *lim, u8 buf[SD_BUF_SIZE])
{
return 0;
}
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1df847b5f747..ae6ce6f5d622 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -24,14 +24,15 @@
/*
* Configure exchange of protection information between OS and HBA.
*/
-void sd_dif_config_host(struct scsi_disk *sdkp)
+void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim)
{
struct scsi_device *sdp = sdkp->device;
- struct gendisk *disk = sdkp->disk;
u8 type = sdkp->protection_type;
- struct blk_integrity bi;
+ struct blk_integrity *bi = &lim->integrity;
int dif, dix;
+ memset(bi, 0, sizeof(*bi));
+
dif = scsi_host_dif_capable(sdp->host, type);
dix = scsi_host_dix_capable(sdp->host, type);
@@ -39,45 +40,33 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
dif = 0; dix = 1;
}
- if (!dix) {
- blk_integrity_unregister(disk);
+ if (!dix)
return;
- }
-
- memset(&bi, 0, sizeof(bi));
/* Enable DMA of protection information */
- if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
- if (type == T10_PI_TYPE3_PROTECTION)
- bi.profile = &t10_pi_type3_ip;
- else
- bi.profile = &t10_pi_type1_ip;
+ if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP)
+ bi->csum_type = BLK_INTEGRITY_CSUM_IP;
+ else
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
- bi.flags |= BLK_INTEGRITY_IP_CHECKSUM;
- } else
- if (type == T10_PI_TYPE3_PROTECTION)
- bi.profile = &t10_pi_type3_crc;
- else
- bi.profile = &t10_pi_type1_crc;
+ if (type != T10_PI_TYPE3_PROTECTION)
+ bi->flags |= BLK_INTEGRITY_REF_TAG;
- bi.tuple_size = sizeof(struct t10_pi_tuple);
+ bi->tuple_size = sizeof(struct t10_pi_tuple);
if (dif && type) {
- bi.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
if (!sdkp->ATO)
- goto out;
+ return;
if (type == T10_PI_TYPE3_PROTECTION)
- bi.tag_size = sizeof(u16) + sizeof(u32);
+ bi->tag_size = sizeof(u16) + sizeof(u32);
else
- bi.tag_size = sizeof(u16);
+ bi->tag_size = sizeof(u16);
}
sd_first_printk(KERN_NOTICE, sdkp,
"Enabling DIX %s, application tag size %u bytes\n",
- bi.profile->name, bi.tag_size);
-out:
- blk_integrity_register(disk, &bi);
+ blk_integrity_profile_name(bi), bi->tag_size);
}
-
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 806036e48abe..c8b9654d30f0 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -232,7 +232,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
int zone_idx = 0;
int ret;
- if (!sd_is_zoned(sdkp))
+ if (sdkp->device->type != TYPE_ZBC)
/* Not a zoned device */
return -EOPNOTSUPP;
@@ -300,7 +300,7 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
sector_t sector = blk_rq_pos(rq);
- if (!sd_is_zoned(sdkp))
+ if (sdkp->device->type != TYPE_ZBC)
/* Not a zoned device */
return BLK_STS_IOERR;
@@ -521,7 +521,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
static void sd_zbc_print_zones(struct scsi_disk *sdkp)
{
- if (!sd_is_zoned(sdkp) || !sdkp->capacity)
+ if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity)
return;
if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1))
@@ -565,12 +565,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
sdkp->zone_info.zone_blocks = zone_blocks;
sdkp->zone_info.nr_zones = nr_zones;
- blk_queue_chunk_sectors(q,
- logical_to_sectors(sdkp->device, zone_blocks));
-
- /* Enable block layer zone append emulation */
- blk_queue_max_zone_append_sectors(q, 0);
-
flags = memalloc_noio_save();
ret = blk_revalidate_disk_zones(disk);
memalloc_noio_restore(flags);
@@ -588,27 +582,31 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
/**
* sd_zbc_read_zones - Read zone information and update the request queue
* @sdkp: SCSI disk pointer.
+ * @lim: queue limits to read into
* @buf: 512 byte buffer used for storing SCSI command output.
*
* Read zone information and update the request queue zone characteristics and
* also the zoned device information in *sdkp. Called by sd_revalidate_disk()
* before the gendisk capacity has been set.
*/
-int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
+int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
+ u8 buf[SD_BUF_SIZE])
{
- struct gendisk *disk = sdkp->disk;
- struct request_queue *q = disk->queue;
unsigned int nr_zones;
u32 zone_blocks = 0;
int ret;
- if (!sd_is_zoned(sdkp)) {
- /*
- * Device managed or normal SCSI disk, no special handling
- * required.
- */
+ if (sdkp->device->type != TYPE_ZBC)
return 0;
- }
+
+ lim->features |= BLK_FEAT_ZONED;
+
+ /*
+ * Per ZBC and ZAC specifications, writes in sequential write required
+ * zones of host-managed devices must be aligned to the device physical
+ * block size.
+ */
+ lim->zone_write_granularity = sdkp->physical_block_size;
/* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */
sdkp->device->use_16_for_rw = 1;
@@ -625,18 +623,20 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
if (ret != 0)
goto err;
- /* The drive satisfies the kernel restrictions: set it up */
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- if (sdkp->zones_max_open == U32_MAX)
- disk_set_max_open_zones(disk, 0);
- else
- disk_set_max_open_zones(disk, sdkp->zones_max_open);
- disk_set_max_active_zones(disk, 0);
nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
-
sdkp->early_zone_info.nr_zones = nr_zones;
sdkp->early_zone_info.zone_blocks = zone_blocks;
+ /* The drive satisfies the kernel restrictions: set it up */
+ if (sdkp->zones_max_open == U32_MAX)
+ lim->max_open_zones = 0;
+ else
+ lim->max_open_zones = sdkp->zones_max_open;
+ lim->max_active_zones = 0;
+ lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks);
+ /* Enable block layer zone append emulation */
+ lim->max_zone_append_sectors = 0;
+
return 0;
err:
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 7ab000942b97..3f491019103e 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -111,7 +111,7 @@ static struct lock_class_key sr_bio_compl_lkclass;
static int sr_open(struct cdrom_device_info *, int);
static void sr_release(struct cdrom_device_info *);
-static void get_sectorsize(struct scsi_cd *);
+static int get_sectorsize(struct scsi_cd *);
static int get_capabilities(struct scsi_cd *);
static unsigned int sr_check_events(struct cdrom_device_info *cdi,
@@ -473,15 +473,15 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
return BLK_STS_IOERR;
}
-static void sr_revalidate_disk(struct scsi_cd *cd)
+static int sr_revalidate_disk(struct scsi_cd *cd)
{
struct scsi_sense_hdr sshdr;
/* if the unit is not ready, nothing more to do */
if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
- return;
+ return 0;
sr_cd_check(&cd->cdi);
- get_sectorsize(cd);
+ return get_sectorsize(cd);
}
static int sr_block_open(struct gendisk *disk, blk_mode_t mode)
@@ -494,13 +494,16 @@ static int sr_block_open(struct gendisk *disk, blk_mode_t mode)
return -ENXIO;
scsi_autopm_get_device(sdev);
- if (disk_check_media_change(disk))
- sr_revalidate_disk(cd);
+ if (disk_check_media_change(disk)) {
+ ret = sr_revalidate_disk(cd);
+ if (ret)
+ goto out;
+ }
mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, mode);
mutex_unlock(&cd->lock);
-
+out:
scsi_autopm_put_device(sdev);
if (ret)
scsi_device_put(cd->device);
@@ -685,7 +688,9 @@ static int sr_probe(struct device *dev)
blk_pm_runtime_init(sdev->request_queue, dev);
dev_set_drvdata(dev, cd);
- sr_revalidate_disk(cd);
+ error = sr_revalidate_disk(cd);
+ if (error)
+ goto unregister_cdrom;
error = device_add_disk(&sdev->sdev_gendev, disk, NULL);
if (error)
@@ -714,13 +719,14 @@ fail:
}
-static void get_sectorsize(struct scsi_cd *cd)
+static int get_sectorsize(struct scsi_cd *cd)
{
+ struct request_queue *q = cd->device->request_queue;
static const u8 cmd[10] = { READ_CAPACITY };
unsigned char buffer[8] = { };
- int the_result;
+ struct queue_limits lim;
+ int err;
int sector_size;
- struct request_queue *queue;
struct scsi_failure failure_defs[] = {
{
.result = SCMD_FAILURE_RESULT_ANY,
@@ -736,10 +742,10 @@ static void get_sectorsize(struct scsi_cd *cd)
};
/* Do the command and wait.. */
- the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer,
+ err = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer,
sizeof(buffer), SR_TIMEOUT, MAX_RETRIES,
&exec_args);
- if (the_result) {
+ if (err) {
cd->capacity = 0x1fffff;
sector_size = 2048; /* A guess, just in case */
} else {
@@ -789,10 +795,12 @@ static void get_sectorsize(struct scsi_cd *cd)
set_capacity(cd->disk, cd->capacity);
}
- queue = cd->device->request_queue;
- blk_queue_logical_block_size(queue, sector_size);
-
- return;
+ lim = queue_limits_start_update(q);
+ lim.logical_block_size = sector_size;
+ blk_mq_freeze_queue(q);
+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
+ return err;
}
static int get_capabilities(struct scsi_cd *cd)
diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig
index e6ba3573a772..f3f869639588 100644
--- a/drivers/soc/litex/Kconfig
+++ b/drivers/soc/litex/Kconfig
@@ -7,7 +7,7 @@ config LITEX
config LITEX_SOC_CONTROLLER
tristate "Enable LiteX SoC Controller driver"
- depends on OF || COMPILE_TEST
+ depends on OF
depends on HAS_IOMEM
select LITEX
help
diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c
index 10813299aa10..72c44119dd54 100644
--- a/drivers/soc/litex/litex_soc_ctrl.c
+++ b/drivers/soc/litex/litex_soc_ctrl.c
@@ -82,13 +82,11 @@ static int litex_reset_handler(struct notifier_block *this, unsigned long mode,
return NOTIFY_DONE;
}
-#ifdef CONFIG_OF
static const struct of_device_id litex_soc_ctrl_of_match[] = {
{.compatible = "litex,soc-controller"},
{},
};
MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match);
-#endif /* CONFIG_OF */
static int litex_soc_ctrl_probe(struct platform_device *pdev)
{
@@ -130,7 +128,7 @@ static void litex_soc_ctrl_remove(struct platform_device *pdev)
static struct platform_driver litex_soc_ctrl_driver = {
.driver = {
.name = "litex-soc-controller",
- .of_match_table = of_match_ptr(litex_soc_ctrl_of_match)
+ .of_match_table = litex_soc_ctrl_of_match,
},
.probe = litex_soc_ctrl_probe,
.remove_new = litex_soc_ctrl_remove,
diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c
index 40fb09d69014..65279243072c 100644
--- a/drivers/soc/qcom/pmic_glink.c
+++ b/drivers/soc/qcom/pmic_glink.c
@@ -348,11 +348,15 @@ static void pmic_glink_remove(struct platform_device *pdev)
mutex_unlock(&__pmic_glink_lock);
}
+static const unsigned long pmic_glink_sc8280xp_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) |
+ BIT(PMIC_GLINK_CLIENT_ALTMODE);
+
static const unsigned long pmic_glink_sm8450_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) |
BIT(PMIC_GLINK_CLIENT_ALTMODE) |
BIT(PMIC_GLINK_CLIENT_UCSI);
static const struct of_device_id pmic_glink_of_match[] = {
+ { .compatible = "qcom,sc8280xp-pmic-glink", .data = &pmic_glink_sc8280xp_client_mask },
{ .compatible = "qcom,pmic-glink", .data = &pmic_glink_sm8450_client_mask },
{}
};
diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c
index b6bfd6729df3..d27667283846 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra.c
@@ -127,8 +127,8 @@ static void tegra_fuse_print_sku_info(struct tegra_sku_info *tegra_sku_info)
static int tegra_fuse_add_lookups(struct tegra_fuse *fuse)
{
- fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups),
- fuse->soc->num_lookups, GFP_KERNEL);
+ fuse->lookups = kmemdup_array(fuse->soc->lookups, fuse->soc->num_lookups,
+ sizeof(*fuse->lookups), GFP_KERNEL);
if (!fuse->lookups)
return -ENOMEM;
diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c
index 20d94bcfc9b4..795e223f7e5c 100644
--- a/drivers/soundwire/amd_manager.c
+++ b/drivers/soundwire/amd_manager.c
@@ -571,6 +571,9 @@ static int sdw_master_read_amd_prop(struct sdw_bus *bus)
amd_manager->wake_en_mask = wake_en_mask;
fwnode_property_read_u32(link, "amd-sdw-power-mode", &power_mode_mask);
amd_manager->power_mode_mask = power_mode_mask;
+
+ fwnode_handle_put(link);
+
return 0;
}
diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c
index 17cf27e6ea73..18517121cc89 100644
--- a/drivers/soundwire/intel_auxdevice.c
+++ b/drivers/soundwire/intel_auxdevice.c
@@ -155,8 +155,10 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus)
SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY;
intel_prop = devm_kzalloc(bus->dev, sizeof(*intel_prop), GFP_KERNEL);
- if (!intel_prop)
+ if (!intel_prop) {
+ fwnode_handle_put(link);
return -ENOMEM;
+ }
/* initialize with hardware defaults, in case the properties are not found */
intel_prop->doaise = 0x1;
@@ -184,6 +186,8 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus)
intel_prop->dodse,
intel_prop->dods);
+ fwnode_handle_put(link);
+
return 0;
}
diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c
index 55a9c51c84c1..e5d9df26d4dc 100644
--- a/drivers/soundwire/mipi_disco.c
+++ b/drivers/soundwire/mipi_disco.c
@@ -66,8 +66,10 @@ int sdw_master_read_prop(struct sdw_bus *bus)
prop->clk_freq = devm_kcalloc(bus->dev, prop->num_clk_freq,
sizeof(*prop->clk_freq),
GFP_KERNEL);
- if (!prop->clk_freq)
+ if (!prop->clk_freq) {
+ fwnode_handle_put(link);
return -ENOMEM;
+ }
fwnode_property_read_u32_array(link,
"mipi-sdw-clock-frequencies-supported",
@@ -92,8 +94,10 @@ int sdw_master_read_prop(struct sdw_bus *bus)
prop->clk_gears = devm_kcalloc(bus->dev, prop->num_clk_gears,
sizeof(*prop->clk_gears),
GFP_KERNEL);
- if (!prop->clk_gears)
+ if (!prop->clk_gears) {
+ fwnode_handle_put(link);
return -ENOMEM;
+ }
fwnode_property_read_u32_array(link,
"mipi-sdw-supported-clock-gears",
@@ -116,6 +120,8 @@ int sdw_master_read_prop(struct sdw_bus *bus)
fwnode_property_read_u32(link, "mipi-sdw-command-error-threshold",
&prop->err_threshold);
+ fwnode_handle_put(link);
+
return 0;
}
EXPORT_SYMBOL(sdw_master_read_prop);
@@ -197,8 +203,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
dpn[i].num_words,
sizeof(*dpn[i].words),
GFP_KERNEL);
- if (!dpn[i].words)
+ if (!dpn[i].words) {
+ fwnode_handle_put(node);
return -ENOMEM;
+ }
fwnode_property_read_u32_array(node,
"mipi-sdw-port-wordlength-configs",
@@ -236,8 +244,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
dpn[i].num_channels,
sizeof(*dpn[i].channels),
GFP_KERNEL);
- if (!dpn[i].channels)
+ if (!dpn[i].channels) {
+ fwnode_handle_put(node);
return -ENOMEM;
+ }
fwnode_property_read_u32_array(node,
"mipi-sdw-channel-number-list",
@@ -251,8 +261,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
dpn[i].num_ch_combinations,
sizeof(*dpn[i].ch_combinations),
GFP_KERNEL);
- if (!dpn[i].ch_combinations)
+ if (!dpn[i].ch_combinations) {
+ fwnode_handle_put(node);
return -ENOMEM;
+ }
fwnode_property_read_u32_array(node,
"mipi-sdw-channel-combination-list",
@@ -274,6 +286,8 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave,
/* TODO: Read audio mode */
+ fwnode_handle_put(node);
+
i++;
}
@@ -348,10 +362,14 @@ int sdw_slave_read_prop(struct sdw_slave *slave)
prop->dp0_prop = devm_kzalloc(&slave->dev,
sizeof(*prop->dp0_prop),
GFP_KERNEL);
- if (!prop->dp0_prop)
+ if (!prop->dp0_prop) {
+ fwnode_handle_put(port);
return -ENOMEM;
+ }
sdw_slave_read_dp0(slave, port, prop->dp0_prop);
+
+ fwnode_handle_put(port);
}
/*
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index e358ac5b4509..96a524772549 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -164,16 +164,20 @@ static void spi_engine_gen_xfer(struct spi_engine_program *p, bool dry,
}
static void spi_engine_gen_sleep(struct spi_engine_program *p, bool dry,
- int delay_ns, u32 sclk_hz)
+ int delay_ns, int inst_ns, u32 sclk_hz)
{
unsigned int t;
- /* negative delay indicates error, e.g. from spi_delay_to_ns() */
- if (delay_ns <= 0)
+ /*
+ * Negative delay indicates error, e.g. from spi_delay_to_ns(). And if
+ * delay is less that the instruction execution time, there is no need
+ * for an extra sleep instruction since the instruction execution time
+ * will already cover the required delay.
+ */
+ if (delay_ns < 0 || delay_ns <= inst_ns)
return;
- /* rounding down since executing the instruction adds a couple of ticks delay */
- t = DIV_ROUND_DOWN_ULL((u64)delay_ns * sclk_hz, NSEC_PER_SEC);
+ t = DIV_ROUND_UP_ULL((u64)(delay_ns - inst_ns) * sclk_hz, NSEC_PER_SEC);
while (t) {
unsigned int n = min(t, 256U);
@@ -220,10 +224,16 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry,
struct spi_device *spi = msg->spi;
struct spi_controller *host = spi->controller;
struct spi_transfer *xfer;
- int clk_div, new_clk_div;
+ int clk_div, new_clk_div, inst_ns;
bool keep_cs = false;
u8 bits_per_word = 0;
+ /*
+ * Take into account instruction execution time for more accurate sleep
+ * times, especially when the delay is small.
+ */
+ inst_ns = DIV_ROUND_UP(NSEC_PER_SEC, host->max_speed_hz);
+
clk_div = 1;
spi_engine_program_add_cmd(p, dry,
@@ -252,7 +262,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry,
spi_engine_gen_xfer(p, dry, xfer);
spi_engine_gen_sleep(p, dry, spi_delay_to_ns(&xfer->delay, xfer),
- xfer->effective_speed_hz);
+ inst_ns, xfer->effective_speed_hz);
if (xfer->cs_change) {
if (list_is_last(&xfer->transfer_list, &msg->transfers)) {
@@ -262,7 +272,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry,
spi_engine_gen_cs(p, dry, spi, false);
spi_engine_gen_sleep(p, dry, spi_delay_to_ns(
- &xfer->cs_change_delay, xfer),
+ &xfer->cs_change_delay, xfer), inst_ns,
xfer->effective_speed_hz);
if (!list_next_entry(xfer, transfer_list)->cs_off)
diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c
index 9d747ea69926..8b618ef0f711 100644
--- a/drivers/spi/spi-cs42l43.c
+++ b/drivers/spi/spi-cs42l43.c
@@ -26,7 +26,7 @@
#include <linux/units.h>
#define CS42L43_FIFO_SIZE 16
-#define CS42L43_SPI_ROOT_HZ (40 * HZ_PER_MHZ)
+#define CS42L43_SPI_ROOT_HZ 49152000
#define CS42L43_SPI_MAX_LENGTH 65532
enum cs42l43_spi_cmd {
@@ -54,7 +54,7 @@ static const struct software_node ampr = {
static struct spi_board_info ampl_info = {
.modalias = "cs35l56",
- .max_speed_hz = 20 * HZ_PER_MHZ,
+ .max_speed_hz = 11 * HZ_PER_MHZ,
.chip_select = 0,
.mode = SPI_MODE_0,
.swnode = &ampl,
@@ -62,7 +62,7 @@ static struct spi_board_info ampl_info = {
static struct spi_board_info ampr_info = {
.modalias = "cs35l56",
- .max_speed_hz = 20 * HZ_PER_MHZ,
+ .max_speed_hz = 11 * HZ_PER_MHZ,
.chip_select = 1,
.mode = SPI_MODE_0,
.swnode = &ampr,
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index be3998104bfb..f7e8b5efa50e 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -984,6 +984,9 @@ static int davinci_spi_probe(struct platform_device *pdev)
return ret;
free_dma:
+ /* This bit needs to be cleared to disable dpsi->clk */
+ clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
+
if (dspi->dma_rx) {
dma_release_channel(dspi->dma_rx);
dma_release_channel(dspi->dma_tx);
@@ -1013,6 +1016,9 @@ static void davinci_spi_remove(struct platform_device *pdev)
spi_bitbang_stop(&dspi->bitbang);
+ /* This bit needs to be cleared to disable dpsi->clk */
+ clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
+
if (dspi->dma_rx) {
dma_release_channel(dspi->dma_rx);
dma_release_channel(dspi->dma_tx);
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index f4006c82f867..1439883326cf 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -660,18 +660,8 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
ctrl |= (spi_imx->target_burst * 8 - 1)
<< MX51_ECSPI_CTRL_BL_OFFSET;
else {
- if (spi_imx->usedma) {
- ctrl |= (spi_imx->bits_per_word - 1)
- << MX51_ECSPI_CTRL_BL_OFFSET;
- } else {
- if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST)
- ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1)
- << MX51_ECSPI_CTRL_BL_OFFSET;
- else
- ctrl |= (spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word,
- BITS_PER_BYTE) * spi_imx->bits_per_word - 1)
- << MX51_ECSPI_CTRL_BL_OFFSET;
- }
+ ctrl |= (spi_imx->bits_per_word - 1)
+ << MX51_ECSPI_CTRL_BL_OFFSET;
}
/* set clock speed */
@@ -1060,7 +1050,7 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = {
.rx_available = mx31_rx_available,
.reset = mx31_reset,
.fifo_size = 8,
- .has_dmamode = true,
+ .has_dmamode = false,
.dynamic_burst = false,
.has_targetmode = false,
.devtype = IMX35_CSPI,
diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
index 5d72e3d59df8..c02c4204442f 100644
--- a/drivers/spi/spi-mux.c
+++ b/drivers/spi/spi-mux.c
@@ -158,12 +158,14 @@ static int spi_mux_probe(struct spi_device *spi)
/* supported modes are the same as our parent's */
ctlr->mode_bits = spi->controller->mode_bits;
ctlr->flags = spi->controller->flags;
+ ctlr->bits_per_word_mask = spi->controller->bits_per_word_mask;
ctlr->transfer_one_message = spi_mux_transfer_one_message;
ctlr->setup = spi_mux_setup;
ctlr->num_chipselect = mux_control_states(priv->mux);
ctlr->bus_num = -1;
ctlr->dev.of_node = spi->dev.of_node;
ctlr->must_async = true;
+ ctlr->defer_optimize_message = true;
ret = devm_spi_register_controller(&spi->dev, ctlr);
if (ret)
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 7e3083b83534..002f29dbcea6 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1277,24 +1277,11 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr,
/*
* Check if this transfer contains only one word;
- * OR contains 1 to 4 words, with bits_per_word == 8 and no delay between each word
- * OR contains 1 to 2 words, with bits_per_word == 16 and no delay between each word
- *
- * If one of the two last case is true, this also change the bits_per_word of this
- * transfer to make it a bit faster.
- * It's not an issue to change the bits_per_word here even if the multi-mode is not
- * applicable for this message, the signal on the wire will be the same.
*/
if (bits_per_word < 8 && tr->len == 1) {
/* multi-mode is applicable, only one word (1..7 bits) */
- } else if (tr->word_delay.value == 0 && bits_per_word == 8 && tr->len <= 4) {
- /* multi-mode is applicable, only one "bigger" word (8,16,24,32 bits) */
- tr->bits_per_word = tr->len * bits_per_word;
- } else if (tr->word_delay.value == 0 && bits_per_word == 16 && tr->len <= 2) {
- /* multi-mode is applicable, only one "bigger" word (16,32 bits) */
- tr->bits_per_word = tr->len * bits_per_word / 2;
} else if (bits_per_word >= 8 && tr->len == bits_per_word / 8) {
- /* multi-mode is applicable, only one word (9..15,17..32 bits) */
+ /* multi-mode is applicable, only one word (8..32 bits) */
} else {
/* multi-mode is not applicable: more than one word in the transfer */
mcspi->use_multi_mode = false;
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index f1e922fd362a..955c920c4b63 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -349,7 +349,7 @@ static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi)
static int stm32_qspi_get_mode(u8 buswidth)
{
- if (buswidth == 4)
+ if (buswidth >= 4)
return CCR_BUSWIDTH_4;
return buswidth;
@@ -653,9 +653,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
return -EINVAL;
mode = spi->mode & (SPI_TX_OCTAL | SPI_RX_OCTAL);
- if ((mode == SPI_TX_OCTAL || mode == SPI_RX_OCTAL) ||
- ((mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) &&
- gpiod_count(qspi->dev, "cs") == -ENOENT)) {
+ if (mode && gpiod_count(qspi->dev, "cs") == -ENOENT) {
dev_err(qspi->dev, "spi-rx-bus-width\\/spi-tx-bus-width\\/cs-gpios\n");
dev_err(qspi->dev, "configuration not supported\n");
@@ -676,10 +674,10 @@ static int stm32_qspi_setup(struct spi_device *spi)
qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
/*
- * Dual flash mode is only enable in case SPI_TX_OCTAL and SPI_TX_OCTAL
- * are both set in spi->mode and "cs-gpios" properties is found in DT
+ * Dual flash mode is only enable in case SPI_TX_OCTAL or SPI_RX_OCTAL
+ * is set in spi->mode and "cs-gpios" properties is found in DT
*/
- if (mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) {
+ if (mode) {
qspi->cr_reg |= CR_DFM;
dev_dbg(qspi->dev, "Dual flash mode enable");
}
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 9bc9fd10d538..0f04e832f9ec 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -689,10 +689,12 @@ static int __spi_add_device(struct spi_device *spi)
* Make sure that multiple logical CS doesn't map to the same physical CS.
* For example, spi->chip_select[0] != spi->chip_select[1] and so on.
*/
- for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
- status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1);
- if (status)
- return status;
+ if (!spi_controller_is_target(ctlr)) {
+ for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
+ status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1);
+ if (status)
+ return status;
+ }
}
/* Set the bus ID string */
@@ -2149,7 +2151,8 @@ static void __spi_unoptimize_message(struct spi_message *msg)
*/
static void spi_maybe_unoptimize_message(struct spi_message *msg)
{
- if (!msg->pre_optimized && msg->optimized)
+ if (!msg->pre_optimized && msg->optimized &&
+ !msg->spi->controller->defer_optimize_message)
__spi_unoptimize_message(msg);
}
@@ -4156,7 +4159,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
return -EINVAL;
if (xfer->tx_nbits != SPI_NBITS_SINGLE &&
xfer->tx_nbits != SPI_NBITS_DUAL &&
- xfer->tx_nbits != SPI_NBITS_QUAD)
+ xfer->tx_nbits != SPI_NBITS_QUAD &&
+ xfer->tx_nbits != SPI_NBITS_OCTAL)
return -EINVAL;
if ((xfer->tx_nbits == SPI_NBITS_DUAL) &&
!(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
@@ -4171,7 +4175,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
return -EINVAL;
if (xfer->rx_nbits != SPI_NBITS_SINGLE &&
xfer->rx_nbits != SPI_NBITS_DUAL &&
- xfer->rx_nbits != SPI_NBITS_QUAD)
+ xfer->rx_nbits != SPI_NBITS_QUAD &&
+ xfer->rx_nbits != SPI_NBITS_OCTAL)
return -EINVAL;
if ((xfer->rx_nbits == SPI_NBITS_DUAL) &&
!(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
@@ -4290,6 +4295,11 @@ static int __spi_optimize_message(struct spi_device *spi,
static int spi_maybe_optimize_message(struct spi_device *spi,
struct spi_message *msg)
{
+ if (spi->controller->defer_optimize_message) {
+ msg->spi = spi;
+ return 0;
+ }
+
if (msg->pre_optimized)
return 0;
@@ -4320,6 +4330,13 @@ int spi_optimize_message(struct spi_device *spi, struct spi_message *msg)
{
int ret;
+ /*
+ * Pre-optimization is not supported and optimization is deferred e.g.
+ * when using spi-mux.
+ */
+ if (spi->controller->defer_optimize_message)
+ return 0;
+
ret = __spi_optimize_message(spi, msg);
if (ret)
return ret;
@@ -4346,6 +4363,9 @@ EXPORT_SYMBOL_GPL(spi_optimize_message);
*/
void spi_unoptimize_message(struct spi_message *msg)
{
+ if (msg->spi->controller->defer_optimize_message)
+ return;
+
__spi_unoptimize_message(msg);
msg->pre_optimized = false;
}
@@ -4428,8 +4448,6 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
- spi_maybe_unoptimize_message(message);
-
return ret;
}
EXPORT_SYMBOL_GPL(spi_async);
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 69daeba974f2..5f518e5a9273 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -707,7 +707,7 @@ int vchiq_initialise(struct vchiq_state *state, struct vchiq_instance **instance
* block forever.
*/
for (i = 0; i < VCHIQ_INIT_RETRIES; i++) {
- if (state)
+ if (vchiq_remote_initialised(state))
break;
usleep_range(500, 600);
}
@@ -1202,7 +1202,7 @@ void vchiq_dump_platform_instances(struct vchiq_state *state, struct seq_file *f
{
int i;
- if (!state)
+ if (!vchiq_remote_initialised(state))
return;
/*
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
index 8af209e34fb2..382ec08f6a14 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
@@ -413,6 +413,11 @@ struct vchiq_state {
struct opaque_platform_state *platform_state;
};
+static inline bool vchiq_remote_initialised(const struct vchiq_state *state)
+{
+ return state->remote && state->remote->initialised;
+}
+
struct bulk_waiter {
struct vchiq_bulk *bulk;
struct completion event;
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
index 1f74d0bb33ba..d5f7f61c5626 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
@@ -138,7 +138,7 @@ void vchiq_debugfs_deinit(void)
#else /* CONFIG_DEBUG_FS */
-void vchiq_debugfs_init(void)
+void vchiq_debugfs_init(struct vchiq_state *state)
{
}
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
index 3c63347d2d08..430f2ed2ccd3 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
@@ -1170,6 +1170,11 @@ static int vchiq_open(struct inode *inode, struct file *file)
dev_dbg(state->dev, "arm: vchiq open\n");
+ if (!vchiq_remote_initialised(state)) {
+ dev_dbg(state->dev, "arm: vchiq has no connection to VideoCore\n");
+ return -ENOTCONN;
+ }
+
instance = kzalloc(sizeof(*instance), GFP_KERNEL);
if (!instance)
return -ENOMEM;
@@ -1200,7 +1205,7 @@ static int vchiq_release(struct inode *inode, struct file *file)
dev_dbg(state->dev, "arm: instance=%p\n", instance);
- if (!state) {
+ if (!vchiq_remote_initialised(state)) {
ret = -EPERM;
goto out;
}
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7f6ca8177845..a3e09adc4e76 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -148,35 +148,38 @@ static int iblock_configure_device(struct se_device *dev)
dev->dev_attrib.is_nonrot = 1;
bi = bdev_get_integrity(bd);
- if (bi) {
- struct bio_set *bs = &ib_dev->ibd_bio_set;
-
- if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") ||
- !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) {
- pr_err("IBLOCK export of blk_integrity: %s not"
- " supported\n", bi->profile->name);
- ret = -ENOSYS;
- goto out_blkdev_put;
- }
+ if (!bi)
+ return 0;
- if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-CRC")) {
- dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT;
- } else if (!strcmp(bi->profile->name, "T10-DIF-TYPE1-CRC")) {
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_IP:
+ pr_err("IBLOCK export of blk_integrity: %s not supported\n",
+ blk_integrity_profile_name(bi));
+ ret = -ENOSYS;
+ goto out_blkdev_put;
+ case BLK_INTEGRITY_CSUM_CRC:
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT;
- }
+ else
+ dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT;
+ break;
+ default:
+ break;
+ }
- if (dev->dev_attrib.pi_prot_type) {
- if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) {
- pr_err("Unable to allocate bioset for PI\n");
- ret = -ENOMEM;
- goto out_blkdev_put;
- }
- pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
- &bs->bio_integrity_pool);
+ if (dev->dev_attrib.pi_prot_type) {
+ struct bio_set *bs = &ib_dev->ibd_bio_set;
+
+ if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) {
+ pr_err("Unable to allocate bioset for PI\n");
+ ret = -ENOMEM;
+ goto out_blkdev_put;
}
- dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
+ pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
+ &bs->bio_integrity_pool);
}
+ dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
return 0;
out_blkdev_put:
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index 3235e1c719e8..3e73efa51bba 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -660,7 +660,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev,
const struct ffa_ops *ops)
{
const struct ffa_msg_ops *msg_ops = ops->msg_ops;
- struct ffa_send_direct_data data = { OPTEE_FFA_GET_API_VERSION };
+ struct ffa_send_direct_data data = {
+ .data0 = OPTEE_FFA_GET_API_VERSION,
+ };
int rc;
msg_ops->mode_32bit_set(ffa_dev);
@@ -677,7 +679,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev,
return false;
}
- data = (struct ffa_send_direct_data){ OPTEE_FFA_GET_OS_VERSION };
+ data = (struct ffa_send_direct_data){
+ .data0 = OPTEE_FFA_GET_OS_VERSION,
+ };
rc = msg_ops->sync_send_receive(ffa_dev, &data);
if (rc) {
pr_err("Unexpected error %d\n", rc);
@@ -698,7 +702,9 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
unsigned int *rpc_param_count,
unsigned int *max_notif_value)
{
- struct ffa_send_direct_data data = { OPTEE_FFA_EXCHANGE_CAPABILITIES };
+ struct ffa_send_direct_data data = {
+ .data0 = OPTEE_FFA_EXCHANGE_CAPABILITIES,
+ };
int rc;
rc = ops->msg_ops->sync_send_receive(ffa_dev, &data);
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 45f04a25255a..1b2345a697c5 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz)
return;
}
+ if (!params->trip_max)
+ return;
+
allocate_power(tz, params->trip_max->temperature);
params->update_cdevs = true;
}
diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c
index 65974fe8be0d..fd5527188cf9 100644
--- a/drivers/thermal/gov_step_wise.c
+++ b/drivers/thermal/gov_step_wise.c
@@ -55,7 +55,11 @@ static unsigned long get_target_state(struct thermal_instance *instance,
if (cur_state <= instance->lower)
return THERMAL_NO_TARGET;
- return clamp(cur_state - 1, instance->lower, instance->upper);
+ /*
+ * If 'throttle' is false, no mitigation is necessary, so
+ * request the lower state for this instance.
+ */
+ return instance->lower;
}
return instance->target;
@@ -93,23 +97,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz,
if (instance->initialized && old_target == instance->target)
continue;
- if (trip->type == THERMAL_TRIP_PASSIVE) {
- /*
- * If the target state for this thermal instance
- * changes from THERMAL_NO_TARGET to something else,
- * ensure that the zone temperature will be updated
- * (assuming enabled passive cooling) until it becomes
- * THERMAL_NO_TARGET again, or the cooling device may
- * not be reset to its initial state.
- */
- if (old_target == THERMAL_NO_TARGET &&
- instance->target != THERMAL_NO_TARGET)
- tz->passive++;
- else if (old_target != THERMAL_NO_TARGET &&
- instance->target == THERMAL_NO_TARGET)
- tz->passive--;
- }
-
instance->initialized = true;
mutex_lock(&instance->cdev->lock);
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 14e34eabc419..4a1bfebb1b8e 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -150,7 +150,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid)
{
struct proc_thermal_pci *pci_info = devid;
struct proc_thermal_device *proc_priv;
- int ret = IRQ_HANDLED;
+ int ret = IRQ_NONE;
u32 status;
proc_priv = pci_info->proc_priv;
@@ -175,6 +175,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid)
/* Disable enable interrupt flag */
proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
pkg_thermal_schedule_work(&pci_info->work);
+ ret = IRQ_HANDLED;
}
pci_write_config_byte(pci_info->pdev, 0xdc, 0x01);
diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c
index 0bb3a495b56e..819ed0110f3e 100644
--- a/drivers/thermal/mediatek/lvts_thermal.c
+++ b/drivers/thermal/mediatek/lvts_thermal.c
@@ -769,7 +769,11 @@ static int lvts_golden_temp_init(struct device *dev, u8 *calib,
*/
gt = (((u32 *)calib)[0] >> lvts_data->gt_calib_bit_offset) & 0xff;
- if (gt && gt < LVTS_GOLDEN_TEMP_MAX)
+ /* A zero value for gt means that device has invalid efuse data */
+ if (!gt)
+ return -ENODATA;
+
+ if (gt < LVTS_GOLDEN_TEMP_MAX)
golden_temp = gt;
golden_temp_offset = golden_temp * 500 + lvts_data->temp_offset;
@@ -1458,7 +1462,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = {
},
VALID_SENSOR_MAP(1, 1, 1, 1),
.offset = 0x0,
- .mode = LVTS_MSR_FILTERED_MODE,
},
{
.lvts_sensor = {
@@ -1469,7 +1472,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = {
},
VALID_SENSOR_MAP(1, 1, 0, 0),
.offset = 0x100,
- .mode = LVTS_MSR_FILTERED_MODE,
}
};
@@ -1483,7 +1485,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = {
},
VALID_SENSOR_MAP(0, 1, 0, 0),
.offset = 0x0,
- .mode = LVTS_MSR_FILTERED_MODE,
},
{
.lvts_sensor = {
@@ -1496,7 +1497,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = {
},
VALID_SENSOR_MAP(1, 1, 1, 0),
.offset = 0x100,
- .mode = LVTS_MSR_FILTERED_MODE,
},
{
.lvts_sensor = {
@@ -1507,7 +1507,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = {
},
VALID_SENSOR_MAP(1, 1, 0, 0),
.offset = 0x200,
- .mode = LVTS_MSR_FILTERED_MODE,
},
{
.lvts_sensor = {
@@ -1518,7 +1517,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = {
},
VALID_SENSOR_MAP(1, 1, 0, 0),
.offset = 0x300,
- .mode = LVTS_MSR_FILTERED_MODE,
}
};
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d70e76dd3c94..ecc748d15eb7 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies);
else if (tz->polling_delay_jiffies)
thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies);
+ else if (tz->temperature == THERMAL_TEMP_INVALID)
+ thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
}
static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz)
@@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz,
thermal_governor_trip_crossed(governor, tz, trip, crossed_up);
}
-static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a,
+static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a,
const struct list_head *b)
{
struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc,
notify_list_node);
struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc,
notify_list_node);
- int ret = tdb->notify_temp - tda->notify_temp;
-
- return ascending ? ret : -ret;
+ return tda->notify_temp - tdb->notify_temp;
}
void __thermal_zone_device_update(struct thermal_zone_device *tz,
@@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
update_temperature(tz);
if (tz->temperature == THERMAL_TEMP_INVALID)
- return;
+ goto monitor;
__thermal_zone_set_trips(tz);
@@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
for_each_trip_desc(tz, td)
handle_thermal_trip(tz, td, &way_up_list, &way_down_list);
- list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp);
+ list_sort(NULL, &way_up_list, thermal_trip_notify_cmp);
list_for_each_entry(td, &way_up_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, true);
list_sort(NULL, &way_down_list, thermal_trip_notify_cmp);
- list_for_each_entry(td, &way_down_list, notify_list_node)
+ list_for_each_entry_reverse(td, &way_down_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, false);
if (governor->manage)
@@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
thermal_debug_update_trip_stats(tz);
+monitor:
monitor_thermal_zone(tz);
}
@@ -1406,6 +1407,7 @@ thermal_zone_device_register_with_trips(const char *type,
ida_init(&tz->ida);
mutex_init(&tz->lock);
init_completion(&tz->removal);
+ init_completion(&tz->resume);
id = ida_alloc(&thermal_tz_ida, GFP_KERNEL);
if (id < 0) {
result = id;
@@ -1651,6 +1653,9 @@ static void thermal_zone_device_resume(struct work_struct *work)
thermal_zone_device_init(tz);
__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+ complete(&tz->resume);
+ tz->resuming = false;
+
mutex_unlock(&tz->lock);
}
@@ -1668,6 +1673,20 @@ static int thermal_pm_notify(struct notifier_block *nb,
list_for_each_entry(tz, &thermal_tz_list, node) {
mutex_lock(&tz->lock);
+ if (tz->resuming) {
+ /*
+ * thermal_zone_device_resume() queued up for
+ * this zone has not acquired the lock yet, so
+ * release it to let the function run and wait
+ * util it has done the work.
+ */
+ mutex_unlock(&tz->lock);
+
+ wait_for_completion(&tz->resume);
+
+ mutex_lock(&tz->lock);
+ }
+
tz->suspended = true;
mutex_unlock(&tz->lock);
@@ -1685,6 +1704,9 @@ static int thermal_pm_notify(struct notifier_block *nb,
cancel_delayed_work(&tz->poll_queue);
+ reinit_completion(&tz->resume);
+ tz->resuming = true;
+
/*
* Replace the work function with the resume one, which
* will restore the original work function and schedule
@@ -1709,6 +1731,12 @@ static int thermal_pm_notify(struct notifier_block *nb,
static struct notifier_block thermal_pm_nb = {
.notifier_call = thermal_pm_notify,
+ /*
+ * Run at the lowest priority to avoid interference between the thermal
+ * zone resume work items spawned by thermal_pm_notify() and the other
+ * PM notifiers.
+ */
+ .priority = INT_MIN,
};
static int __init thermal_init(void)
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 20e7b45673d6..94eeb4011a48 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -55,6 +55,7 @@ struct thermal_governor {
* @type: the thermal zone device type
* @device: &struct device for this thermal zone
* @removal: removal completion
+ * @resume: resume completion
* @trip_temp_attrs: attributes for trip points for sysfs: trip temperature
* @trip_type_attrs: attributes for trip points for sysfs: trip type
* @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis
@@ -89,6 +90,7 @@ struct thermal_governor {
* @poll_queue: delayed work for polling
* @notify_event: Last notification event
* @suspended: thermal zone suspend indicator
+ * @resuming: indicates whether or not thermal zone resume is in progress
* @trips: array of struct thermal_trip objects
*/
struct thermal_zone_device {
@@ -96,6 +98,7 @@ struct thermal_zone_device {
char type[THERMAL_NAME_LENGTH];
struct device device;
struct completion removal;
+ struct completion resume;
struct attribute_group trips_attribute_group;
struct thermal_attr *trip_temp_attrs;
struct thermal_attr *trip_type_attrs;
@@ -123,12 +126,19 @@ struct thermal_zone_device {
struct delayed_work poll_queue;
enum thermal_notify_event notify_event;
bool suspended;
+ bool resuming;
#ifdef CONFIG_THERMAL_DEBUGFS
struct thermal_debugfs *debugfs;
#endif
struct thermal_trip_desc trips[] __counted_by(num_trips);
};
+/*
+ * Default delay after a failing thermal zone temperature check before
+ * attempting to check it again.
+ */
+#define THERMAL_RECHECK_DELAY_MS 250
+
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 458bb1280ebf..5b97e420a95f 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -288,7 +288,7 @@ struct mxser_board {
enum mxser_must_hwid must_hwid;
speed_t max_baud;
- struct mxser_port ports[] __counted_by(nports);
+ struct mxser_port ports[] /* __counted_by(nports) */;
};
static DECLARE_BITMAP(mxser_boards, MXSER_BOARDS);
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index ff15022369e4..b0adafc44747 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -15,7 +15,6 @@
*/
#include <linux/acpi.h>
-#include <linux/cleanup.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ioport.h>
@@ -42,8 +41,6 @@
#include <asm/irq.h>
-#include "../serial_base.h" /* For serial_base_add_isa_preferred_console() */
-
#include "8250.h"
/*
@@ -563,8 +560,6 @@ static void __init serial8250_isa_init_ports(void)
port->irqflags |= irqflag;
if (serial8250_isa_config != NULL)
serial8250_isa_config(i, &up->port, &up->capabilities);
-
- serial_base_add_isa_preferred_console(serial8250_reg.dev_name, i);
}
}
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 170639d12b2a..1af9aed99c65 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -115,6 +115,10 @@
/* RX FIFO occupancy indicator */
#define UART_OMAP_RX_LVL 0x19
+/* Timeout low and High */
+#define UART_OMAP_TO_L 0x26
+#define UART_OMAP_TO_H 0x27
+
/*
* Copy of the genpd flags for the console.
* Only used if console suspend is disabled
@@ -663,13 +667,25 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
/*
* On K3 SoCs, it is observed that RX TIMEOUT is signalled after
- * FIFO has been drained, in which case a dummy read of RX FIFO
- * is required to clear RX TIMEOUT condition.
+ * FIFO has been drained or erroneously.
+ * So apply solution of Errata i2310 as mentioned in
+ * https://www.ti.com/lit/pdf/sprz536
*/
if (priv->habit & UART_RX_TIMEOUT_QUIRK &&
(iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT &&
serial_port_in(port, UART_OMAP_RX_LVL) == 0) {
- serial_port_in(port, UART_RX);
+ unsigned char efr2, timeout_h, timeout_l;
+
+ efr2 = serial_in(up, UART_OMAP_EFR2);
+ timeout_h = serial_in(up, UART_OMAP_TO_H);
+ timeout_l = serial_in(up, UART_OMAP_TO_L);
+ serial_out(up, UART_OMAP_TO_H, 0xFF);
+ serial_out(up, UART_OMAP_TO_L, 0xFF);
+ serial_out(up, UART_OMAP_EFR2, UART_OMAP_EFR2_TIMEOUT_BEHAVE);
+ serial_in(up, UART_IIR);
+ serial_out(up, UART_OMAP_EFR2, efr2);
+ serial_out(up, UART_OMAP_TO_H, timeout_h);
+ serial_out(up, UART_OMAP_TO_L, timeout_l);
}
/* Stop processing interrupts on input overrun */
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 40af74b55933..e1d7aa2fa347 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1985,6 +1985,17 @@ enum {
MOXA_SUPP_RS485 = BIT(2),
};
+static unsigned short moxa_get_nports(unsigned short device)
+{
+ switch (device) {
+ case PCI_DEVICE_ID_MOXA_CP116E_A_A:
+ case PCI_DEVICE_ID_MOXA_CP116E_A_B:
+ return 8;
+ }
+
+ return FIELD_GET(0x00F0, device);
+}
+
static bool pci_moxa_is_mini_pcie(unsigned short device)
{
if (device == PCI_DEVICE_ID_MOXA_CP102N ||
@@ -2038,7 +2049,7 @@ static int pci_moxa_init(struct pci_dev *dev)
{
unsigned short device = dev->device;
resource_size_t iobar_addr = pci_resource_start(dev, 2);
- unsigned int num_ports = (device & 0x00F0) >> 4, i;
+ unsigned int i, num_ports = moxa_get_nports(device);
u8 val, init_mode = MOXA_RS232;
if (!(pci_moxa_supported_rs(dev) & MOXA_SUPP_RS232)) {
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 34801a6f300b..b88cc28c94e3 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -308,8 +308,8 @@ static void bcm_uart_do_tx(struct uart_port *port)
val = bcm_uart_readl(port, UART_MCTL_REG);
val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT;
-
- pending = uart_port_tx_limited(port, ch, port->fifosize - val,
+ pending = uart_port_tx_limited_flags(port, ch, UART_TX_NOSTOP,
+ port->fifosize - val,
true,
bcm_uart_writel(port, ch, UART_FIFO_REG),
({}));
@@ -320,6 +320,9 @@ static void bcm_uart_do_tx(struct uart_port *port)
val = bcm_uart_readl(port, UART_IR_REG);
val &= ~UART_TX_INT_MASK;
bcm_uart_writel(port, val, UART_IR_REG);
+
+ if (uart_tx_stopped(port))
+ bcm_uart_stop_tx(port);
}
/*
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 2eb22594960f..ff32cd2d2863 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -120,6 +120,7 @@
#define UCR4_OREN (1<<1) /* Receiver overrun interrupt enable */
#define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */
#define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */
+#define UFCR_RXTL_MASK 0x3F /* Receiver trigger 6 bits wide */
#define UFCR_DCEDTE (1<<6) /* DCE/DTE mode select */
#define UFCR_RFDIV (7<<7) /* Reference freq divider mask */
#define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7)
@@ -1551,6 +1552,7 @@ static void imx_uart_shutdown(struct uart_port *port)
struct imx_port *sport = (struct imx_port *)port;
unsigned long flags;
u32 ucr1, ucr2, ucr4, uts;
+ int loops;
if (sport->dma_is_enabled) {
dmaengine_terminate_sync(sport->dma_chan_tx);
@@ -1613,6 +1615,56 @@ static void imx_uart_shutdown(struct uart_port *port)
ucr4 &= ~UCR4_TCEN;
imx_uart_writel(sport, ucr4, UCR4);
+ /*
+ * We have to ensure the tx state machine ends up in OFF. This
+ * is especially important for rs485 where we must not leave
+ * the RTS signal high, blocking the bus indefinitely.
+ *
+ * All interrupts are now disabled, so imx_uart_stop_tx() will
+ * no longer be called from imx_uart_transmit_buffer(). It may
+ * still be called via the hrtimers, and if those are in play,
+ * we have to honour the delays.
+ */
+ if (sport->tx_state == WAIT_AFTER_RTS || sport->tx_state == SEND)
+ imx_uart_stop_tx(port);
+
+ /*
+ * In many cases (rs232 mode, or if tx_state was
+ * WAIT_AFTER_RTS, or if tx_state was SEND and there is no
+ * delay_rts_after_send), this will have moved directly to
+ * OFF. In rs485 mode, tx_state might already have been
+ * WAIT_AFTER_SEND and the hrtimer thus already started, or
+ * the above imx_uart_stop_tx() call could have started it. In
+ * those cases, we have to wait for the hrtimer to fire and
+ * complete the transition to OFF.
+ */
+ loops = port->rs485.flags & SER_RS485_ENABLED ?
+ port->rs485.delay_rts_after_send : 0;
+ while (sport->tx_state != OFF && loops--) {
+ uart_port_unlock_irqrestore(&sport->port, flags);
+ msleep(1);
+ uart_port_lock_irqsave(&sport->port, &flags);
+ }
+
+ if (sport->tx_state != OFF) {
+ dev_warn(sport->port.dev, "unexpected tx_state %d\n",
+ sport->tx_state);
+ /*
+ * This machine may be busted, but ensure the RTS
+ * signal is inactive in order not to block other
+ * devices.
+ */
+ if (port->rs485.flags & SER_RS485_ENABLED) {
+ ucr2 = imx_uart_readl(sport, UCR2);
+ if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+ imx_uart_rts_active(sport, &ucr2);
+ else
+ imx_uart_rts_inactive(sport, &ucr2);
+ imx_uart_writel(sport, ucr2, UCR2);
+ }
+ sport->tx_state = OFF;
+ }
+
uart_port_unlock_irqrestore(&sport->port, flags);
clk_disable_unprepare(sport->clk_per);
@@ -1933,7 +1985,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio
struct serial_rs485 *rs485conf)
{
struct imx_port *sport = (struct imx_port *)port;
- u32 ucr2;
+ u32 ucr2, ufcr;
if (rs485conf->flags & SER_RS485_ENABLED) {
/* Enable receiver if low-active RTS signal is requested */
@@ -1952,8 +2004,13 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio
/* Make sure Rx is enabled in case Tx is active with Rx disabled */
if (!(rs485conf->flags & SER_RS485_ENABLED) ||
- rs485conf->flags & SER_RS485_RX_DURING_TX)
+ rs485conf->flags & SER_RS485_RX_DURING_TX) {
+ /* If the receiver trigger is 0, set it to a default value */
+ ufcr = imx_uart_readl(sport, UFCR);
+ if ((ufcr & UFCR_RXTL_MASK) == 0)
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
imx_uart_start_rx(port);
+ }
return 0;
}
diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c
index 19f0a305cc43..3b4206e815fe 100644
--- a/drivers/tty/serial/ma35d1_serial.c
+++ b/drivers/tty/serial/ma35d1_serial.c
@@ -688,12 +688,13 @@ static int ma35d1serial_probe(struct platform_device *pdev)
struct uart_ma35d1_port *up;
int ret = 0;
- if (pdev->dev.of_node) {
- ret = of_alias_get_id(pdev->dev.of_node, "serial");
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret);
- return ret;
- }
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ ret = of_alias_get_id(pdev->dev.of_node, "serial");
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret);
+ return ret;
}
up = &ma35d1serial_ports[ret];
up->port.line = ret;
diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c
index b0604d6da025..58858dd352c5 100644
--- a/drivers/tty/serial/mcf.c
+++ b/drivers/tty/serial/mcf.c
@@ -462,7 +462,7 @@ static const struct uart_ops mcf_uart_ops = {
.verify_port = mcf_verify_port,
};
-static struct mcf_uart mcf_ports[4];
+static struct mcf_uart mcf_ports[10];
#define MCF_MAXPORTS ARRAY_SIZE(mcf_ports)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 2bd25afe0d92..69a632fefc41 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -649,15 +649,25 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport)
{
+ unsigned char c;
u32 irq_en;
- if (qcom_geni_serial_main_active(uport) ||
- !qcom_geni_serial_tx_empty(uport))
- return;
+ /*
+ * Start a new transfer in case the previous command was cancelled and
+ * left data in the FIFO which may prevent the watermark interrupt
+ * from triggering. Note that the stale data is discarded.
+ */
+ if (!qcom_geni_serial_main_active(uport) &&
+ !qcom_geni_serial_tx_empty(uport)) {
+ if (uart_fifo_out(uport, &c, 1) == 1) {
+ writel(M_CMD_DONE_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+ qcom_geni_serial_setup_tx(uport, 1);
+ writel(c, uport->membase + SE_GENI_TX_FIFOn);
+ }
+ }
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
irq_en |= M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN;
-
writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG);
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
}
@@ -665,13 +675,17 @@ static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport)
static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
{
u32 irq_en;
- struct qcom_geni_serial_port *port = to_dev_port(uport);
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
irq_en &= ~(M_CMD_DONE_EN | M_TX_FIFO_WATERMARK_EN);
writel(0, uport->membase + SE_GENI_TX_WATERMARK_REG);
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
- /* Possible stop tx is called multiple times. */
+}
+
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
if (!qcom_geni_serial_main_active(uport))
return;
@@ -684,6 +698,8 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
}
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+
+ port->tx_remaining = 0;
}
static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
@@ -862,7 +878,7 @@ static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport,
memset(buf, 0, sizeof(buf));
tx_bytes = min(remaining, BYTES_PER_FIFO_WORD);
- tx_bytes = uart_fifo_out(uport, buf, tx_bytes);
+ uart_fifo_out(uport, buf, tx_bytes);
iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1);
@@ -890,13 +906,17 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
else
pending = kfifo_len(&tport->xmit_fifo);
- /* All data has been transmitted and acknowledged as received */
- if (!pending && !status && done) {
+ /* All data has been transmitted or command has been cancelled */
+ if (!pending && done) {
qcom_geni_serial_stop_tx_fifo(uport);
goto out_write_wakeup;
}
- avail = port->tx_fifo_depth - (status & TX_FIFO_WC);
+ if (active)
+ avail = port->tx_fifo_depth - (status & TX_FIFO_WC);
+ else
+ avail = port->tx_fifo_depth;
+
avail *= BYTES_PER_FIFO_WORD;
chunk = min(avail, pending);
@@ -1069,11 +1089,15 @@ static void qcom_geni_serial_shutdown(struct uart_port *uport)
{
disable_irq(uport->irq);
- if (uart_console(uport))
- return;
-
qcom_geni_serial_stop_tx(uport);
qcom_geni_serial_stop_rx(uport);
+
+ qcom_geni_serial_cancel_tx_cmd(uport);
+}
+
+static void qcom_geni_serial_flush_buffer(struct uart_port *uport)
+{
+ qcom_geni_serial_cancel_tx_cmd(uport);
}
static int qcom_geni_serial_port_setup(struct uart_port *uport)
@@ -1532,6 +1556,7 @@ static const struct uart_ops qcom_geni_console_pops = {
.request_port = qcom_geni_serial_request_port,
.config_port = qcom_geni_serial_config_port,
.shutdown = qcom_geni_serial_shutdown,
+ .flush_buffer = qcom_geni_serial_flush_buffer,
.type = qcom_geni_serial_get_type,
.set_mctrl = qcom_geni_serial_set_mctrl,
.get_mctrl = qcom_geni_serial_get_mctrl,
diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h
index 743a72ac34f3..b6c38d2edfd4 100644
--- a/drivers/tty/serial/serial_base.h
+++ b/drivers/tty/serial/serial_base.h
@@ -49,33 +49,3 @@ void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port
int serial_core_register_port(struct uart_driver *drv, struct uart_port *port);
void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port);
-
-#ifdef CONFIG_SERIAL_CORE_CONSOLE
-
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port);
-
-#else
-
-static inline
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port)
-{
- return 0;
-}
-
-#endif
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-
-int serial_base_add_isa_preferred_console(const char *name, int idx);
-
-#else
-
-static inline
-int serial_base_add_isa_preferred_console(const char *name, int idx)
-{
- return 0;
-}
-
-#endif
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
index 73c6ee540c83..4df2a4b10445 100644
--- a/drivers/tty/serial/serial_base_bus.c
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -8,7 +8,6 @@
* The serial core bus manages the serial core controller instances.
*/
-#include <linux/cleanup.h>
#include <linux/container_of.h>
#include <linux/device.h>
#include <linux/idr.h>
@@ -205,134 +204,6 @@ void serial_base_port_device_remove(struct serial_port_device *port_dev)
put_device(&port_dev->dev);
}
-#ifdef CONFIG_SERIAL_CORE_CONSOLE
-
-static int serial_base_add_one_prefcon(const char *match, const char *dev_name,
- int port_id)
-{
- int ret;
-
- ret = add_preferred_console_match(match, dev_name, port_id);
- if (ret == -ENOENT)
- return 0;
-
- return ret;
-}
-
-#ifdef __sparc__
-
-/* Handle Sparc ttya and ttyb options as done in console_setup() */
-static int serial_base_add_sparc_console(const char *dev_name, int idx)
-{
- const char *name;
-
- switch (idx) {
- case 0:
- name = "ttya";
- break;
- case 1:
- name = "ttyb";
- break;
- default:
- return 0;
- }
-
- return serial_base_add_one_prefcon(name, dev_name, idx);
-}
-
-#else
-
-static inline int serial_base_add_sparc_console(const char *dev_name, int idx)
-{
- return 0;
-}
-
-#endif
-
-static int serial_base_add_prefcon(const char *name, int idx)
-{
- const char *char_match __free(kfree) = NULL;
- const char *nmbr_match __free(kfree) = NULL;
- int ret;
-
- /* Handle ttyS specific options */
- if (strstarts(name, "ttyS")) {
- /* No name, just a number */
- nmbr_match = kasprintf(GFP_KERNEL, "%i", idx);
- if (!nmbr_match)
- return -ENODEV;
-
- ret = serial_base_add_one_prefcon(nmbr_match, name, idx);
- if (ret)
- return ret;
-
- /* Sparc ttya and ttyb */
- ret = serial_base_add_sparc_console(name, idx);
- if (ret)
- return ret;
- }
-
- /* Handle the traditional character device name style console=ttyS0 */
- char_match = kasprintf(GFP_KERNEL, "%s%i", name, idx);
- if (!char_match)
- return -ENOMEM;
-
- return serial_base_add_one_prefcon(char_match, name, idx);
-}
-
-/**
- * serial_base_add_preferred_console - Adds a preferred console
- * @drv: Serial port device driver
- * @port: Serial port instance
- *
- * Tries to add a preferred console for a serial port if specified in the
- * kernel command line. Supports both the traditional character device such
- * as console=ttyS0, and a hardware addressing based console=DEVNAME:0.0
- * style name.
- *
- * Translates the kernel command line option using a hardware based addressing
- * console=DEVNAME:0.0 to the serial port character device such as ttyS0.
- * Cannot be called early for ISA ports, depends on struct device.
- *
- * Note that duplicates are ignored by add_preferred_console().
- *
- * Return: 0 on success, negative error code on failure.
- */
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port)
-{
- const char *port_match __free(kfree) = NULL;
- int ret;
-
- ret = serial_base_add_prefcon(drv->dev_name, port->line);
- if (ret)
- return ret;
-
- port_match = kasprintf(GFP_KERNEL, "%s:%i.%i", dev_name(port->dev),
- port->ctrl_id, port->port_id);
- if (!port_match)
- return -ENOMEM;
-
- /* Translate a hardware addressing style console=DEVNAME:0.0 */
- return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line);
-}
-
-#endif
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-
-/*
- * Early ISA ports initialize the console before there is no struct device.
- * This should be only called from serial8250_isa_init_preferred_console(),
- * other callers are likely wrong and should rely on earlycon instead.
- */
-int serial_base_add_isa_preferred_console(const char *name, int idx)
-{
- return serial_base_add_prefcon(name, idx);
-}
-
-#endif
-
static int serial_base_init(void)
{
int ret;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0c4d60976663..2a8006e3d687 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3422,10 +3422,6 @@ int serial_core_register_port(struct uart_driver *drv, struct uart_port *port)
if (ret)
goto err_unregister_ctrl_dev;
- ret = serial_base_add_preferred_console(drv, port);
- if (ret)
- goto err_unregister_port_dev;
-
ret = serial_core_add_one_port(drv, port);
if (ret)
goto err_unregister_port_dev;
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 8944548c30fa..c532416aec22 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
* @hba: per adapter instance
* @req: pointer to the request to be issued
*
- * Return: the hardware queue instance on which the request would
- * be queued.
+ * Return: the hardware queue instance on which the request will be or has
+ * been queued. %NULL if the request has already been freed.
*/
struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
struct request *req)
{
- u32 utag = blk_mq_unique_tag(req);
- u32 hwq = blk_mq_unique_tag_to_hwq(utag);
+ struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx);
- return &hba->uhq[hwq];
+ return hctx ? &hba->uhq[hctx->queue_num] : NULL;
}
/**
@@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag)
if (!cmd)
return -EINVAL;
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
+ if (!hwq)
+ return 0;
} else {
hwq = hba->dev_cmd_queue;
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index e5e9da61f15d..702fa1996992 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5193,17 +5193,19 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
}
/**
- * ufshcd_slave_configure - adjust SCSI device configurations
+ * ufshcd_device_configure - adjust SCSI device configurations
* @sdev: pointer to SCSI device
+ * @lim: queue limits
*
* Return: 0 (success).
*/
-static int ufshcd_slave_configure(struct scsi_device *sdev)
+static int ufshcd_device_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct ufs_hba *hba = shost_priv(sdev->host);
struct request_queue *q = sdev->request_queue;
- blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
+ lim->dma_pad_mask = PRDT_DATA_BYTE_COUNT_PAD - 1;
/*
* Block runtime-pm until all consumers are added.
@@ -6456,6 +6458,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
/* Release cmd in MCQ mode if abort succeeds */
if (is_mcq_enabled(hba) && (*ret == 0)) {
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+ if (!hwq)
+ return 0;
spin_lock_irqsave(&hwq->cq_lock, flags);
if (ufshcd_cmd_inflight(lrbp->cmd))
ufshcd_release_scsi_cmd(hba, lrbp);
@@ -8787,6 +8791,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
(hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) {
/* Reset the device and controller before doing reinit */
ufshcd_device_reset(hba);
+ ufs_put_device_desc(hba);
ufshcd_hba_stop(hba);
ufshcd_vops_reinit_notify(hba);
ret = ufshcd_hba_enable(hba);
@@ -8907,7 +8912,7 @@ static const struct scsi_host_template ufshcd_driver_template = {
.queuecommand = ufshcd_queuecommand,
.mq_poll = ufshcd_poll,
.slave_alloc = ufshcd_slave_alloc,
- .slave_configure = ufshcd_slave_configure,
+ .device_configure = ufshcd_device_configure,
.slave_destroy = ufshcd_slave_destroy,
.change_queue_depth = ufshcd_change_queue_depth,
.eh_abort_handler = ufshcd_abort,
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index 4ce7cba2b48a..8f3b9a0a38e1 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -1131,6 +1131,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance,
struct cxacru_data *instance;
struct usb_device *usb_dev = interface_to_usbdev(intf);
struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD];
+ struct usb_endpoint_descriptor *in, *out;
int ret;
/* instance init */
@@ -1177,6 +1178,19 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance,
goto fail;
}
+ if (usb_endpoint_xfer_int(&cmd_ep->desc))
+ ret = usb_find_common_endpoints(intf->cur_altsetting,
+ NULL, NULL, &in, &out);
+ else
+ ret = usb_find_common_endpoints(intf->cur_altsetting,
+ &in, &out, NULL, NULL);
+
+ if (ret) {
+ usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
== USB_ENDPOINT_XFER_INT) {
usb_fill_int_urb(instance->rcv_urb,
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 3362af165ef5..880d52c0949d 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -291,6 +291,20 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
if (ifp->desc.bNumEndpoints >= num_ep)
goto skip_to_next_endpoint_or_interface_descriptor;
+ /* Save a copy of the descriptor and use it instead of the original */
+ endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints];
+ memcpy(&endpoint->desc, d, n);
+ d = &endpoint->desc;
+
+ /* Clear the reserved bits in bEndpointAddress */
+ i = d->bEndpointAddress &
+ (USB_ENDPOINT_DIR_MASK | USB_ENDPOINT_NUMBER_MASK);
+ if (i != d->bEndpointAddress) {
+ dev_notice(ddev, "config %d interface %d altsetting %d has an endpoint descriptor with address 0x%X, changing to 0x%X\n",
+ cfgno, inum, asnum, d->bEndpointAddress, i);
+ endpoint->desc.bEndpointAddress = i;
+ }
+
/* Check for duplicate endpoint addresses */
if (config_endpoint_is_duplicate(config, inum, asnum, d)) {
dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n",
@@ -308,10 +322,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
}
}
- endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints];
+ /* Accept this endpoint */
++ifp->desc.bNumEndpoints;
-
- memcpy(&endpoint->desc, d, n);
INIT_LIST_HEAD(&endpoint->urb_list);
/*
diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
index f1a499ee482c..763e4122ed5b 100644
--- a/drivers/usb/core/of.c
+++ b/drivers/usb/core/of.c
@@ -84,9 +84,12 @@ static bool usb_of_has_devices_or_graph(const struct usb_device *hub)
if (of_graph_is_present(np))
return true;
- for_each_child_of_node(np, child)
- if (of_property_present(child, "reg"))
+ for_each_child_of_node(np, child) {
+ if (of_property_present(child, "reg")) {
+ of_node_put(child);
return true;
+ }
+ }
return false;
}
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index b4783574b8e6..13171454f959 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -506,6 +506,9 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT |
USB_QUIRK_DELAY_CTRL_MSG },
+ /* START BP-850k Printer */
+ { USB_DEVICE(0x1bc3, 0x0003), .driver_info = USB_QUIRK_NO_SET_INTF },
+
/* MIDI keyboard WORLDE MINI */
{ USB_DEVICE(0x1c75, 0x0204), .driver_info =
USB_QUIRK_CONFIG_INTF_STRINGS },
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 7ee61a89520b..cb82557678dd 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -957,12 +957,16 @@ static bool dwc3_core_is_valid(struct dwc3 *dwc)
static void dwc3_core_setup_global_control(struct dwc3 *dwc)
{
+ unsigned int power_opt;
+ unsigned int hw_mode;
u32 reg;
reg = dwc3_readl(dwc->regs, DWC3_GCTL);
reg &= ~DWC3_GCTL_SCALEDOWN_MASK;
+ hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0);
+ power_opt = DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1);
- switch (DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1)) {
+ switch (power_opt) {
case DWC3_GHWPARAMS1_EN_PWROPT_CLK:
/**
* WORKAROUND: DWC3 revisions between 2.10a and 2.50a have an
@@ -995,6 +999,20 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc)
break;
}
+ /*
+ * This is a workaround for STAR#4846132, which only affects
+ * DWC_usb31 version2.00a operating in host mode.
+ *
+ * There is a problem in DWC_usb31 version 2.00a operating
+ * in host mode that would cause a CSR read timeout When CSR
+ * read coincides with RAM Clock Gating Entry. By disable
+ * Clock Gating, sacrificing power consumption for normal
+ * operation.
+ */
+ if (power_opt != DWC3_GHWPARAMS1_EN_PWROPT_NO &&
+ hw_mode != DWC3_GHWPARAMS0_MODE_GADGET && DWC3_VER_IS(DWC31, 200A))
+ reg |= DWC3_GCTL_DSBLCLKGTNG;
+
/* check if current dwc3 is on simulation board */
if (dwc->hwparams.hwparams6 & DWC3_GHWPARAMS6_EN_FPGA) {
dev_info(dwc->dev, "Running with FPGA optimizations\n");
@@ -2250,7 +2268,6 @@ assert_reset:
static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
- unsigned long flags;
u32 reg;
int i;
@@ -2293,9 +2310,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- spin_lock_irqsave(&dwc->lock, flags);
dwc3_gadget_suspend(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
synchronize_irq(dwc->irq_gadget);
}
@@ -2312,7 +2327,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
{
- unsigned long flags;
int ret;
u32 reg;
int i;
@@ -2366,9 +2380,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) {
dwc3_otg_host_init(dwc);
} else if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- spin_lock_irqsave(&dwc->lock, flags);
dwc3_gadget_resume(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
}
break;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 9ef821ca2fc7..052852f80146 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -54,6 +54,10 @@
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+#define PCI_DEVICE_ID_INTEL_PTLH 0xe332
+#define PCI_DEVICE_ID_INTEL_PTLH_PCH 0xe37e
+#define PCI_DEVICE_ID_INTEL_PTLU 0xe432
+#define PCI_DEVICE_ID_INTEL_PTLU_PCH 0xe47e
#define PCI_DEVICE_ID_AMD_MR 0x163a
#define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
@@ -430,6 +434,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, PTLH, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, PTLH_PCH, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, PTLU, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, PTLU_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) },
{ PCI_DEVICE_DATA(AMD, MR, &dwc3_pci_amd_mr_swnode) },
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index ce3cfa1f36f5..0e7c1e947c0a 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -115,9 +115,12 @@ static int usb_string_copy(const char *s, char **s_copy)
int ret;
char *str;
char *copy = *s_copy;
+
ret = strlen(s);
if (ret > USB_MAX_STRING_LEN)
return -EOVERFLOW;
+ if (ret < 1)
+ return -EINVAL;
if (copy) {
str = copy;
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index ba7d180cc9e6..44e20c6c36d3 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -213,6 +213,7 @@ static inline struct usb_endpoint_descriptor *ep_desc(struct usb_gadget *gadget,
struct usb_endpoint_descriptor *ss)
{
switch (gadget->speed) {
+ case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
return ss;
case USB_SPEED_HIGH:
@@ -449,11 +450,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
mutex_lock(&dev->lock_printer_io);
spin_lock_irqsave(&dev->lock, flags);
- if (dev->interface < 0) {
- spin_unlock_irqrestore(&dev->lock, flags);
- mutex_unlock(&dev->lock_printer_io);
- return -ENODEV;
- }
+ if (dev->interface < 0)
+ goto out_disabled;
/* We will use this flag later to check if a printer reset happened
* after we turn interrupts back on.
@@ -461,6 +459,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
dev->reset_printer = 0;
setup_rx_reqs(dev);
+ /* this dropped the lock - need to retest */
+ if (dev->interface < 0)
+ goto out_disabled;
bytes_copied = 0;
current_rx_req = dev->current_rx_req;
@@ -494,6 +495,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
wait_event_interruptible(dev->rx_wait,
(likely(!list_empty(&dev->rx_buffers))));
spin_lock_irqsave(&dev->lock, flags);
+ if (dev->interface < 0)
+ goto out_disabled;
}
/* We have data to return then copy it to the caller's buffer.*/
@@ -537,6 +540,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
+
/* If we not returning all the data left in this RX request
* buffer then adjust the amount of data left in the buffer.
* Othewise if we are done with this RX request buffer then
@@ -566,6 +572,11 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
return bytes_copied;
else
return -EAGAIN;
+
+out_disabled:
+ spin_unlock_irqrestore(&dev->lock, flags);
+ mutex_unlock(&dev->lock_printer_io);
+ return -ENODEV;
}
static ssize_t
@@ -586,11 +597,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
mutex_lock(&dev->lock_printer_io);
spin_lock_irqsave(&dev->lock, flags);
- if (dev->interface < 0) {
- spin_unlock_irqrestore(&dev->lock, flags);
- mutex_unlock(&dev->lock_printer_io);
- return -ENODEV;
- }
+ if (dev->interface < 0)
+ goto out_disabled;
/* Check if a printer reset happens while we have interrupts on */
dev->reset_printer = 0;
@@ -613,6 +621,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
wait_event_interruptible(dev->tx_wait,
(likely(!list_empty(&dev->tx_reqs))));
spin_lock_irqsave(&dev->lock, flags);
+ if (dev->interface < 0)
+ goto out_disabled;
}
while (likely(!list_empty(&dev->tx_reqs)) && len) {
@@ -662,6 +672,9 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
+
list_add(&req->list, &dev->tx_reqs_active);
/* here, we unlock, and only unlock, to avoid deadlock. */
@@ -674,6 +687,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
mutex_unlock(&dev->lock_printer_io);
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
}
spin_unlock_irqrestore(&dev->lock, flags);
@@ -685,6 +700,11 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return bytes_copied;
else
return -EAGAIN;
+
+out_disabled:
+ spin_unlock_irqrestore(&dev->lock, flags);
+ mutex_unlock(&dev->lock_printer_io);
+ return -ENODEV;
}
static int
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 11dd0b9e847f..95191083b455 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -1163,8 +1163,6 @@ struct net_device *gether_connect(struct gether *link)
if (netif_running(dev->net))
eth_start(dev, GFP_ATOMIC);
- netif_device_attach(dev->net);
-
/* on error, disable any endpoints */
} else {
(void) usb_ep_disable(link->out_ep);
@@ -1202,7 +1200,7 @@ void gether_disconnect(struct gether *link)
DBG(dev, "%s\n", __func__);
- netif_device_detach(dev->net);
+ netif_stop_queue(dev->net);
netif_carrier_off(dev->net);
/* disable endpoints, forcing (synchronous) completion
diff --git a/drivers/usb/gadget/udc/aspeed_udc.c b/drivers/usb/gadget/udc/aspeed_udc.c
index 3916c8e2ba01..821a6ab5da56 100644
--- a/drivers/usb/gadget/udc/aspeed_udc.c
+++ b/drivers/usb/gadget/udc/aspeed_udc.c
@@ -66,8 +66,8 @@
#define USB_UPSTREAM_EN BIT(0)
/* Main config reg */
-#define UDC_CFG_SET_ADDR(x) ((x) & 0x3f)
-#define UDC_CFG_ADDR_MASK (0x3f)
+#define UDC_CFG_SET_ADDR(x) ((x) & UDC_CFG_ADDR_MASK)
+#define UDC_CFG_ADDR_MASK GENMASK(6, 0)
/* Interrupt ctrl & status reg */
#define UDC_IRQ_EP_POOL_NAK BIT(17)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 37eb37b0affa..0a8cf6c17f82 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1125,10 +1125,20 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg)
xhci_dbg(xhci, "Start the secondary HCD\n");
retval = xhci_run(xhci->shared_hcd);
}
-
+ if (retval)
+ return retval;
+ /*
+ * Resume roothubs unconditionally as PORTSC change bits are not
+ * immediately visible after xHC reset
+ */
hcd->state = HC_STATE_SUSPENDED;
- if (xhci->shared_hcd)
+
+ if (xhci->shared_hcd) {
xhci->shared_hcd->state = HC_STATE_SUSPENDED;
+ usb_hcd_resume_root_hub(xhci->shared_hcd);
+ }
+ usb_hcd_resume_root_hub(hcd);
+
goto done;
}
@@ -1152,7 +1162,6 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg)
xhci_dbc_resume(xhci);
- done:
if (retval == 0) {
/*
* Resume roothubs only if there are pending events.
@@ -1178,6 +1187,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg)
usb_hcd_resume_root_hub(hcd);
}
}
+done:
/*
* If system is subject to the Quirk, Compliance Mode Timer needs to
* be re-initialized Always after a system resume. Ports are subject
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 8abf3a567e30..108d9a593a80 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -556,7 +556,7 @@ static int da8xx_probe(struct platform_device *pdev)
ret = of_platform_populate(pdev->dev.of_node, NULL,
da8xx_auxdata_lookup, &pdev->dev);
if (ret)
- return ret;
+ goto err_unregister_phy;
pinfo = da8xx_dev_info;
pinfo.parent = &pdev->dev;
@@ -571,9 +571,13 @@ static int da8xx_probe(struct platform_device *pdev)
ret = PTR_ERR_OR_ZERO(glue->musb);
if (ret) {
dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
- usb_phy_generic_unregister(glue->usb_phy);
+ goto err_unregister_phy;
}
+ return 0;
+
+err_unregister_phy:
+ usb_phy_generic_unregister(glue->usb_phy);
return ret;
}
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 8b0308d84270..85697466b147 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1737,6 +1737,49 @@ static void mos7840_port_remove(struct usb_serial_port *port)
kfree(mos7840_port);
}
+static int mos7840_suspend(struct usb_serial *serial, pm_message_t message)
+{
+ struct moschip_port *mos7840_port;
+ struct usb_serial_port *port;
+ int i;
+
+ for (i = 0; i < serial->num_ports; ++i) {
+ port = serial->port[i];
+ if (!tty_port_initialized(&port->port))
+ continue;
+
+ mos7840_port = usb_get_serial_port_data(port);
+
+ usb_kill_urb(mos7840_port->read_urb);
+ mos7840_port->read_urb_busy = false;
+ }
+
+ return 0;
+}
+
+static int mos7840_resume(struct usb_serial *serial)
+{
+ struct moschip_port *mos7840_port;
+ struct usb_serial_port *port;
+ int res;
+ int i;
+
+ for (i = 0; i < serial->num_ports; ++i) {
+ port = serial->port[i];
+ if (!tty_port_initialized(&port->port))
+ continue;
+
+ mos7840_port = usb_get_serial_port_data(port);
+
+ mos7840_port->read_urb_busy = true;
+ res = usb_submit_urb(mos7840_port->read_urb, GFP_NOIO);
+ if (res)
+ mos7840_port->read_urb_busy = false;
+ }
+
+ return 0;
+}
+
static struct usb_serial_driver moschip7840_4port_device = {
.driver = {
.owner = THIS_MODULE,
@@ -1764,6 +1807,8 @@ static struct usb_serial_driver moschip7840_4port_device = {
.port_probe = mos7840_port_probe,
.port_remove = mos7840_port_remove,
.read_bulk_callback = mos7840_bulk_in_callback,
+ .suspend = mos7840_suspend,
+ .resume = mos7840_resume,
};
static struct usb_serial_driver * const serial_drivers[] = {
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 8a5846d4adf6..311040f9b935 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1425,6 +1425,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */
.driver_info = NCTRL(0) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3000, 0xff), /* Telit FN912 */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3001, 0xff), /* Telit FN912 */
+ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff), /* Telit LE910-S1 (RNDIS) */
.driver_info = NCTRL(2) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */
@@ -1433,6 +1437,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(2) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */
.driver_info = NCTRL(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x9000, 0xff), /* Telit generic core-dump device */
+ .driver_info = NCTRL(0) },
{ USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */
.driver_info = NCTRL(0) | ZLP },
{ USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */
@@ -2224,6 +2230,10 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7106_2COM, 0x02, 0x02, 0x01) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) },
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7126, 0xff, 0x00, 0x00),
+ .driver_info = NCTRL(2) },
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7127, 0xff, 0x00, 0x00),
+ .driver_info = NCTRL(2) | NCTRL(3) | NCTRL(4) },
{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MPL200),
.driver_info = RSVD(1) | RSVD(4) },
@@ -2284,6 +2294,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0f0, 0xff), /* Foxconn T99W373 MBIM */
.driver_info = RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe145, 0xff), /* Foxconn T99W651 RNDIS */
+ .driver_info = RSVD(5) | RSVD(6) },
{ USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */
.driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
{ USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */
@@ -2321,6 +2333,32 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */
.driver_info = RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0802, 0xff), /* Rolling RW350-GL (laptop MBIM) */
+ .driver_info = RSVD(5) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Global */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for Global SKU */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for China SKU */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for SA */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for EU */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for NA */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for China EDU */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Golbal EDU */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index b31464740f6c..8c8b5e6041cc 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -79,6 +79,12 @@ static int slave_alloc (struct scsi_device *sdev)
if (us->protocol == USB_PR_BULK && us->max_lun > 0)
sdev->sdev_bflags |= BLIST_FORCELUN;
+ /*
+ * Some USB storage devices reset if the IO advice hints grouping mode
+ * page is queried. Hence skip that mode page.
+ */
+ sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS;
+
return 0;
}
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index a48870a87a29..b610a2de4ae5 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -21,6 +21,7 @@
#include <scsi/scsi.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_devinfo.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
@@ -820,6 +821,12 @@ static int uas_slave_alloc(struct scsi_device *sdev)
struct uas_dev_info *devinfo =
(struct uas_dev_info *)sdev->host->hostdata;
+ /*
+ * Some USB storage devices reset if the IO advice hints grouping mode
+ * page is queried. Hence skip that mode page.
+ */
+ sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS;
+
sdev->hostdata = devinfo;
return 0;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 8d112c3edae5..adf32ca0f761 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -25,6 +25,7 @@ struct ucsi_acpi {
unsigned long flags;
#define UCSI_ACPI_COMMAND_PENDING 1
#define UCSI_ACPI_ACK_PENDING 2
+#define UCSI_ACPI_CHECK_BOGUS_EVENT 3
guid_t guid;
u64 cmd;
};
@@ -128,6 +129,58 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
.async_write = ucsi_acpi_async_write
};
+static int ucsi_gram_read(struct ucsi *ucsi, unsigned int offset,
+ void *val, size_t val_len)
+{
+ u16 bogus_change = UCSI_CONSTAT_POWER_LEVEL_CHANGE |
+ UCSI_CONSTAT_PDOS_CHANGE;
+ struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ struct ucsi_connector_status *status;
+ int ret;
+
+ ret = ucsi_acpi_read(ucsi, offset, val, val_len);
+ if (ret < 0)
+ return ret;
+
+ if (UCSI_COMMAND(ua->cmd) == UCSI_GET_CONNECTOR_STATUS &&
+ test_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags) &&
+ offset == UCSI_MESSAGE_IN) {
+ status = (struct ucsi_connector_status *)val;
+
+ /* Clear the bogus change */
+ if (status->change == bogus_change)
+ status->change = 0;
+
+ clear_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags);
+ }
+
+ return ret;
+}
+
+static int ucsi_gram_sync_write(struct ucsi *ucsi, unsigned int offset,
+ const void *val, size_t val_len)
+{
+ struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ int ret;
+
+ ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
+ if (ret < 0)
+ return ret;
+
+ if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS &&
+ ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) &&
+ ua->cmd & UCSI_GET_PDOS_SRC_PDOS)
+ set_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags);
+
+ return ret;
+}
+
+static const struct ucsi_operations ucsi_gram_ops = {
+ .read = ucsi_gram_read,
+ .sync_write = ucsi_gram_sync_write,
+ .async_write = ucsi_acpi_async_write
+};
+
static const struct dmi_system_id ucsi_acpi_quirks[] = {
{
.matches = {
@@ -136,6 +189,14 @@ static const struct dmi_system_id ucsi_acpi_quirks[] = {
},
.driver_data = (void *)&ucsi_zenbook_ops,
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "LG gram PC"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "90Q"),
+ },
+ .driver_data = (void *)&ucsi_gram_ops,
+ },
{ }
};
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 985a880e86da..2fa973afe4e6 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -372,6 +372,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev,
ret = fwnode_property_read_u32(fwnode, "reg", &port);
if (ret < 0) {
dev_err(dev, "missing reg property of %pOFn\n", fwnode);
+ fwnode_handle_put(fwnode);
return ret;
}
@@ -386,9 +387,11 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev,
if (!desc)
continue;
- if (IS_ERR(desc))
+ if (IS_ERR(desc)) {
+ fwnode_handle_put(fwnode);
return dev_err_probe(dev, PTR_ERR(desc),
"unable to acquire orientation gpio\n");
+ }
ucsi->port_orientation[port] = desc;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
index ac48b7763114..ac69288e8bb0 100644
--- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c
+++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
@@ -65,6 +65,7 @@ struct ucsi_stm32g0 {
struct device *dev;
unsigned long flags;
#define COMMAND_PENDING 1
+#define ACK_PENDING 2
const char *fw_name;
struct ucsi *ucsi;
bool suspended;
@@ -396,9 +397,13 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const
size_t len)
{
struct ucsi_stm32g0 *g0 = ucsi_get_drvdata(ucsi);
+ bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI;
int ret;
- set_bit(COMMAND_PENDING, &g0->flags);
+ if (ack)
+ set_bit(ACK_PENDING, &g0->flags);
+ else
+ set_bit(COMMAND_PENDING, &g0->flags);
ret = ucsi_stm32g0_async_write(ucsi, offset, val, len);
if (ret)
@@ -406,9 +411,14 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const
if (!wait_for_completion_timeout(&g0->complete, msecs_to_jiffies(5000)))
ret = -ETIMEDOUT;
+ else
+ return 0;
out_clear_bit:
- clear_bit(COMMAND_PENDING, &g0->flags);
+ if (ack)
+ clear_bit(ACK_PENDING, &g0->flags);
+ else
+ clear_bit(COMMAND_PENDING, &g0->flags);
return ret;
}
@@ -429,8 +439,9 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data)
if (UCSI_CCI_CONNECTOR(cci))
ucsi_connector_change(g0->ucsi, UCSI_CCI_CONNECTOR(cci));
- if (test_bit(COMMAND_PENDING, &g0->flags) &&
- cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))
+ if (cci & UCSI_CCI_ACK_COMPLETE && test_and_clear_bit(ACK_PENDING, &g0->flags))
+ complete(&g0->complete);
+ if (cci & UCSI_CCI_COMMAND_COMPLETE && test_and_clear_bit(COMMAND_PENDING, &g0->flags))
complete(&g0->complete);
return IRQ_HANDLED;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 987c7921affa..ba0ce0075b2f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
struct vfio_pci_hot_reset_info hdr;
struct vfio_pci_fill_info fill = {};
bool slot = false;
- int ret, count;
+ int ret, count = 0;
if (copy_from_user(&hdr, arg, minsz))
return -EFAULT;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 85eea38dbdf4..2882944d23cc 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -257,6 +257,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL
config LENOVO_SE10_WDT
tristate "Lenovo SE10 Watchdog"
depends on (X86 && DMI) || COMPILE_TEST
+ depends on HAS_IOPORT
select WATCHDOG_CORE
help
If you say yes here you get support for the watchdog
diff --git a/drivers/watchdog/menz69_wdt.c b/drivers/watchdog/menz69_wdt.c
index c7de30270043..0508a65acfa6 100644
--- a/drivers/watchdog/menz69_wdt.c
+++ b/drivers/watchdog/menz69_wdt.c
@@ -161,6 +161,7 @@ static struct mcb_driver men_z069_driver = {
module_mcb_driver(men_z069_driver);
MODULE_AUTHOR("Johannes Thumshirn <[email protected]>");
+MODULE_DESCRIPTION("Watchdog driver for the MEN z069 IP-Core");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("mcb:16z069");
MODULE_IMPORT_NS(MCB);
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index a7a12f2fe9de..b6e0236509bb 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -370,5 +370,6 @@ static struct platform_driver omap_wdt_driver = {
module_platform_driver(omap_wdt_driver);
MODULE_AUTHOR("George G. Davis");
+MODULE_DESCRIPTION("Driver for the TI OMAP 16xx/24xx/34xx 32KHz (non-secure) watchdog");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:omap_wdt");
diff --git a/drivers/watchdog/simatic-ipc-wdt.c b/drivers/watchdog/simatic-ipc-wdt.c
index cdc1a2e15180..1e91f0a560ff 100644
--- a/drivers/watchdog/simatic-ipc-wdt.c
+++ b/drivers/watchdog/simatic-ipc-wdt.c
@@ -227,6 +227,7 @@ static struct platform_driver simatic_ipc_wdt_driver = {
module_platform_driver(simatic_ipc_wdt_driver);
+MODULE_DESCRIPTION("Siemens SIMATIC IPC driver for Watchdogs");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_AUTHOR("Gerd Haeussler <[email protected]>");
diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c
index 0099403f4992..24b1ad52102e 100644
--- a/drivers/watchdog/ts4800_wdt.c
+++ b/drivers/watchdog/ts4800_wdt.c
@@ -200,5 +200,6 @@ static struct platform_driver ts4800_wdt_driver = {
module_platform_driver(ts4800_wdt_driver);
MODULE_AUTHOR("Damien Riegel <[email protected]>");
+MODULE_DESCRIPTION("Watchdog driver for TS-4800 based boards");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:ts4800_wdt");
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 09d17e20f4a7..8c80d04811e4 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -118,6 +118,7 @@ static struct platform_driver twl4030_wdt_driver = {
module_platform_driver(twl4030_wdt_driver);
MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("TWL4030 Watchdog");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:twl4030_wdt");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 15bb7989c387..3acf5e050072 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -512,7 +512,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque)
struct afs_vnode *vnode = AFS_FS_I(inode);
vnode->volume = as->volume;
- vnode->fid.vid = as->volume->vid,
+ vnode->fid.vid = as->volume->vid;
vnode->fid.vnode = 1;
vnode->fid.unique = 1;
inode->i_ino = 1;
@@ -545,7 +545,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key)
BUG_ON(!(inode->i_state & I_NEW));
vnode = AFS_FS_I(inode);
- vnode->cb_v_check = atomic_read(&as->volume->cb_v_break),
+ vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
afs_set_netfs_context(vnode);
op = afs_alloc_operation(key, as->volume);
diff --git a/fs/aio.c b/fs/aio.c
index 57c9f7c077e6..93ef59d358b3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1516,7 +1516,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
iocb_put(iocb);
}
-static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb, int rw_type)
{
int ret;
@@ -1542,7 +1542,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
} else
req->ki_ioprio = get_current_ioprio();
- ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+ ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags, rw_type);
if (unlikely(ret))
return ret;
@@ -1594,7 +1594,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, READ);
if (ret)
return ret;
file = req->ki_filp;
@@ -1621,7 +1621,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, WRITE);
if (ret)
return ret;
file = req->ki_filp;
diff --git a/fs/attr.c b/fs/attr.c
index 960a310581eb..825007d5cda4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -17,8 +17,6 @@
#include <linux/filelock.h>
#include <linux/security.h>
-#include "internal.h"
-
/**
* setattr_should_drop_sgid - determine whether the setgid bit needs to be
* removed
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index b5e4dfa04ed0..1d644a35ffa0 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -38,4 +38,5 @@ static void __exit exit_autofs_fs(void)
module_init(init_autofs_fs)
module_exit(exit_autofs_fs)
+MODULE_DESCRIPTION("Kernel automounter support");
MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 1f5db6863663..cf792d4de4f1 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -126,7 +126,7 @@ enum {
const struct fs_parameter_spec autofs_param_specs[] = {
fsparam_flag ("direct", Opt_direct),
fsparam_fd ("fd", Opt_fd),
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_flag ("ignore", Opt_ignore),
fsparam_flag ("indirect", Opt_indirect),
fsparam_u32 ("maxproto", Opt_maxproto),
@@ -134,7 +134,7 @@ const struct fs_parameter_spec autofs_param_specs[] = {
fsparam_flag ("offset", Opt_offset),
fsparam_u32 ("pgrp", Opt_pgrp),
fsparam_flag ("strictexpire", Opt_strictexpire),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -193,8 +193,6 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct autofs_fs_context *ctx = fc->fs_private;
struct autofs_sb_info *sbi = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, autofs_param_specs, param, &result);
@@ -205,16 +203,10 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_fd:
return autofs_parse_fd(fc, sbi, param, &result);
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalfc(fc, "Invalid uid");
- ctx->uid = uid;
+ ctx->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalfc(fc, "Invalid gid");
- ctx->gid = gid;
+ ctx->gid = result.gid;
break;
case Opt_pgrp:
ctx->pgrp = result.uint_32;
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c4b6601f5b74..658f11aebda1 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -29,7 +30,7 @@
#include <linux/sched/task.h>
#include <linux/sort.h>
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
+static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
/* Persistent alloc info: */
@@ -259,6 +260,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
"invalid data type (got %u should be %u)",
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
+ for (unsigned i = 0; i < 2; i++)
+ bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
+ c, err,
+ alloc_key_io_time_bad,
+ "invalid io_time[%s]: %llu, max %llu",
+ i == READ ? "read" : "write",
+ a.v->io_time[i], LRU_TIME_MAX);
+
switch (a.v->data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
@@ -757,8 +766,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
alloc_data_type_set(new_a, new_a->data_type);
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
- new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
+ new_a->io_time[READ] = bch2_current_io_time(c, READ);
+ new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
}
@@ -768,6 +777,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
new_a->gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
+ alloc_data_type_set(new_a, new_a->data_type);
}
if (old_a->data_type != new_a->data_type ||
@@ -781,7 +791,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (new_a->data_type == BCH_DATA_cached &&
!new_a->io_time[READ])
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+ new_a->io_time[READ] = bch2_current_io_time(c, READ);
u64 old_lru = alloc_lru_idx_read(*old_a);
u64 new_lru = alloc_lru_idx_read(*new_a);
@@ -882,14 +892,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
closure_wake_up(&c->freelist_wait);
if (statechange(a->data_type == BCH_DATA_need_discard) &&
- !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
+ !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
bucket_flushed(new_a))
- bch2_discard_one_bucket_fast(c, new.k->p);
+ bch2_discard_one_bucket_fast(ca, new.k->p.offset);
if (statechange(a->data_type == BCH_DATA_cached) &&
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
bch2_gc_gens_async(c);
@@ -1544,13 +1554,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
- struct btree_iter *alloc_iter)
+ struct btree_iter *alloc_iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
- struct bkey_s_c alloc_k, lru_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1564,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
+ if (a->fragmentation_lru) {
+ ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
+ a->fragmentation_lru,
+ alloc_k, last_flushed);
+ if (ret)
+ return ret;
+ }
+
if (a->data_type != BCH_DATA_cached)
return 0;
@@ -1579,7 +1597,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
goto err;
- a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+ a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
ret = bch2_trans_update(trans, alloc_iter,
&a_mut->k_i, BTREE_TRIGGER_norun);
if (ret)
@@ -1588,73 +1606,66 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
- lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
- lru_pos(alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]), 0);
- ret = bkey_err(lru_k);
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
+ alloc_k, last_flushed);
if (ret)
- return ret;
-
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
- alloc_key_to_missing_lru_entry,
- "missing lru entry\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- ret = bch2_lru_set(trans,
- alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]);
- if (ret)
- goto err;
- }
+ goto err;
err:
fsck_err:
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}
-static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
+static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
int ret;
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
- ret = -EEXIST;
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
+ ret = -BCH_ERR_EEXIST_discard_in_flight_add;
goto out;
}
- ret = darray_push(&c->discard_buckets_in_flight, bucket);
+ ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
+ .in_progress = in_progress,
+ .bucket = bucket,
+ }));
out:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
return ret;
}
-static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
+static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
+ BUG_ON(!i->in_progress);
+ darray_remove_item(&ca->discard_buckets_in_flight, i);
goto found;
}
BUG();
found:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
struct discard_buckets_state {
@@ -1662,26 +1673,11 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
- struct bch_dev *ca;
u64 need_journal_commit_this_dev;
};
-static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
-{
- if (s->ca == ca)
- return;
-
- if (s->ca && s->need_journal_commit_this_dev >
- bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
- bch2_journal_flush_async(&c->journal, NULL);
-
- if (s->ca)
- percpu_ref_put(&s->ca->io_ref);
- s->ca = ca;
- s->need_journal_commit_this_dev = 0;
-}
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
+ struct bch_dev *ca,
struct btree_iter *need_discard_iter,
struct bpos *discard_pos_done,
struct discard_buckets_state *s)
@@ -1695,16 +1691,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
- struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
- ? s->ca
- : bch2_dev_get_ioref(c, pos.inode, WRITE);
- if (!ca) {
- bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
- return 0;
- }
-
- discard_buckets_next_dev(c, s, ca);
-
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
goto out;
@@ -1764,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
- if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
+ if (discard_in_flight_add(ca, iter.pos.offset, true))
goto out;
discard_locked = true;
@@ -1788,8 +1774,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
- alloc_data_type_set(&a->v, a->v.data_type);
write:
+ alloc_data_type_set(&a->v, a->v.data_type);
+
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
@@ -1801,7 +1788,7 @@ write:
s->discarded++;
out:
if (discard_locked)
- discard_in_flight_remove(c, iter.pos);
+ discard_in_flight_remove(ca, iter.pos.offset);
s->seen++;
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
@@ -1810,7 +1797,8 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+ struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
int ret;
@@ -1821,23 +1809,41 @@ static void bch2_do_discards_work(struct work_struct *work)
* successful commit:
*/
ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter,
- BTREE_ID_need_discard, POS_MIN, 0, k,
- bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
-
- discard_buckets_next_dev(c, &s, NULL);
+ for_each_btree_key_upto(trans, iter,
+ BTREE_ID_need_discard,
+ POS(ca->dev_idx, 0),
+ POS(ca->dev_idx, U64_MAX), 0, k,
+ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_discards(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_discards(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
- !queue_work(c->write_ref_wq, &c->discard_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ for_each_member_device(c, ca)
+ bch2_dev_do_discards(ca);
}
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
@@ -1866,68 +1872,69 @@ err:
static void bch2_do_discards_fast_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
+ struct bch_fs *c = ca->fs;
while (1) {
bool got_bucket = false;
- struct bpos bucket;
- struct bch_dev *ca;
-
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i) {
- if (i->snapshot)
- continue;
+ u64 bucket;
- ca = bch2_dev_get_ioref(c, i->inode, WRITE);
- if (!ca) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i) {
+ if (i->in_progress)
continue;
- }
got_bucket = true;
- bucket = *i;
- i->snapshot = true;
+ bucket = i->bucket;
+ i->in_progress = true;
break;
}
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
if (!got_bucket)
break;
if (ca->mi.discard && !c->opts.nochanges)
blkdev_issue_discard(ca->disk_sb.bdev,
- bucket.offset * ca->mi.bucket_size,
+ bucket_to_sector(ca, bucket),
ca->mi.bucket_size,
GFP_KERNEL);
int ret = bch2_trans_do(c, NULL, NULL,
- BCH_WATERMARK_btree|
- BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_bucket_needs_discard(trans, bucket));
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
bch_err_fn(c, ret);
- percpu_ref_put(&ca->io_ref);
- discard_in_flight_remove(c, bucket);
+ discard_in_flight_remove(ca, bucket);
if (ret)
break;
}
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ percpu_ref_put(&ca->io_ref);
}
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
+static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
{
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
- bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
- rcu_read_unlock();
+ struct bch_fs *c = ca->fs;
+
+ if (discard_in_flight_add(ca, bucket, false))
+ return;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
- if (!dead &&
- !discard_in_flight_add(c, bucket) &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
- !queue_work(c->write_ref_wq, &c->discard_fast_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@@ -1975,8 +1982,8 @@ static int invalidate_one_bucket(struct btree_trans *trans,
a->v.data_type = 0;
a->v.dirty_sectors = 0;
a->v.cached_sectors = 0;
- a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
- a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
+ a->v.io_time[READ] = bch2_current_io_time(c, READ);
+ a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
ret = bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
@@ -2011,9 +2018,25 @@ err:
goto out;
}
+static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct bch_dev *ca, bool *wrapped)
+{
+ struct bkey_s_c k;
+again:
+ k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
+ if (!k.k && !*wrapped) {
+ bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
+ *wrapped = true;
+ goto again;
+ }
+
+ return k;
+}
+
static void bch2_do_invalidates_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
+ struct bch_fs *c = ca->fs;
struct btree_trans *trans = bch2_trans_get(c);
int ret = 0;
@@ -2021,31 +2044,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
if (ret)
goto err;
- for_each_member_device(c, ca) {
- s64 nr_to_invalidate =
- should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+ s64 nr_to_invalidate =
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+ struct btree_iter iter;
+ bool wrapped = false;
- ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
- lru_pos(ca->dev_idx, 0, 0),
- lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
- BTREE_ITER_intent, k,
- invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
+ lru_pos(ca->dev_idx, 0,
+ ((bch2_current_io_time(c, READ) + U32_MAX) &
+ LRU_TIME_MAX)), 0);
- if (ret < 0) {
- bch2_dev_put(ca);
+ while (true) {
+ bch2_trans_begin(trans);
+
+ struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
break;
- }
+ if (!k.k)
+ break;
+
+ ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
+ if (ret)
+ break;
+
+ bch2_btree_iter_advance(&iter);
}
+ bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_invalidates(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->invalidate_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_invalidates(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
- !queue_work(c->write_ref_wq, &c->invalidate_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ for_each_member_device(c, ca)
+ bch2_dev_do_invalidates(ca);
}
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
@@ -2204,7 +2259,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
if (ret)
return ret;
- now = atomic64_read(&c->io_clock[rw].now);
+ now = bch2_current_io_time(c, rw);
if (a->v.io_time[rw] == now)
goto out;
@@ -2361,16 +2416,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d);
}
-void bch2_fs_allocator_background_exit(struct bch_fs *c)
+void bch2_dev_allocator_background_exit(struct bch_dev *ca)
+{
+ darray_exit(&ca->discard_buckets_in_flight);
+}
+
+void bch2_dev_allocator_background_init(struct bch_dev *ca)
{
- darray_exit(&c->discard_buckets_in_flight);
+ mutex_init(&ca->discard_buckets_in_flight_lock);
+ INIT_WORK(&ca->discard_work, bch2_do_discards_work);
+ INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
+ INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
- mutex_init(&c->discard_buckets_in_flight_lock);
- INIT_WORK(&c->discard_work, bch2_do_discards_work);
- INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
- INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index ae31a94be6f9..ba2c5557a3f0 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
!bch2_bucket_sectors_fragmented(ca, a))
return 0;
- u64 d = bch2_bucket_sectors_dirty(a);
+ /*
+ * avoid overflowing LRU_TIME_BITS on a corrupted fs, when
+ * bucket_sectors_dirty is (much) bigger than bucket_size
+ */
+ u64 d = min(bch2_bucket_sectors_dirty(a),
+ ca->mi.bucket_size);
+
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
}
@@ -269,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
enum btree_iter_update_trigger_flags);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
+void bch2_dev_do_discards(struct bch_dev *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@@ -283,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}
+void bch2_dev_do_invalidates(struct bch_dev *);
void bch2_do_invalidates(struct bch_fs *);
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@@ -306,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-void bch2_fs_allocator_background_exit(struct bch_fs *);
+void bch2_dev_allocator_background_exit(struct bch_dev *);
+void bch2_dev_allocator_background_init(struct bch_dev *);
+
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 927a5f300b30..27d97c22ae27 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -621,13 +621,13 @@ again:
avail = dev_buckets_free(ca, *usage, watermark);
if (usage->d[BCH_DATA_need_discard].buckets > avail)
- bch2_do_discards(c);
+ bch2_dev_do_discards(ca);
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_gc_gens_async(c);
if (should_invalidate_buckets(ca, *usage))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (!avail) {
if (cl && !waiting) {
@@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
@@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 4321f9fb73bd..6d8b1bc90be0 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
return ret;
}
-static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
-{
- return bpos_eq(l.k->p, r.k->p) &&
- bkey_bytes(l.k) == bkey_bytes(r.k) &&
- !memcmp(l.v, r.v, bkey_val_bytes(l.k));
-}
-
struct extents_to_bp_state {
struct bpos bucket_start;
struct bpos bucket_end;
@@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
- struct bkey_buf tmp;
int ret = 0;
- bch2_bkey_buf_init(&tmp);
-
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
if (!ca) {
prt_str(&buf, "extent for nonexistent device:bucket ");
@@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
- bch2_bkey_buf_reassemble(&tmp, c, orig_k);
-
- if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
- if (bp.level) {
- bch2_trans_unlock(trans);
- bch2_btree_interior_updates_flush(c);
- }
-
- ret = bch2_btree_write_buffer_flush_sync(trans);
- if (ret)
- goto err;
-
- bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
- ret = -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
+ if (ret)
+ goto err;
goto check_existing_bp;
}
@@ -589,7 +566,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
- bch2_bkey_buf_exit(&tmp, c);
bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
@@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
!((1U << btree) & btree_interior_mask))
continue;
+ bch2_trans_begin(trans);
+
__for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN,
0, depth, BTREE_ITER_prefetch, b, ret) {
@@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c_backpointer bp,
- struct bpos *last_flushed_pos)
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
- if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
- *last_flushed_pos = bp.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
- -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ if (!k.k) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
+ if (ret)
+ goto out;
- if (fsck_err_on(!k.k, c,
- backpointer_to_missing_ptr,
- "backpointer for missing %s\n %s",
- bp.v->level ? "btree node" : "extent",
- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
- goto out;
+ if (fsck_err(c, backpointer_to_missing_ptr,
+ "backpointer for missing %s\n %s",
+ bp.v->level ? "btree node" : "extent",
+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
+ goto out;
+ }
}
out:
fsck_err:
@@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
- struct bpos last_flushed_pos = SPOS_MAX;
+ struct bkey_buf last_flushed;
- return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k),
- &last_flushed_pos));
+ &last_flushed));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ return ret;
}
int bch2_check_backpointers_to_extents(struct bch_fs *c)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 2992a644d822..1106fec6e155 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -493,6 +493,11 @@ struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
+struct discard_in_flight {
+ bool in_progress:1;
+ u64 bucket:63;
+};
+
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,6 +559,12 @@ struct bch_dev {
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
+ struct work_struct invalidate_work;
+ struct work_struct discard_work;
+ struct mutex discard_buckets_in_flight_lock;
+ DARRAY(struct discard_in_flight) discard_buckets_in_flight;
+ struct work_struct discard_fast_work;
+
atomic64_t rebalance_work;
struct journal_device journal;
@@ -915,11 +926,6 @@ struct bch_fs {
unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
- struct work_struct invalidate_work;
- struct work_struct discard_work;
- struct mutex discard_buckets_in_flight_lock;
- DARRAY(struct bpos) discard_buckets_in_flight;
- struct work_struct discard_fast_work;
/* GARBAGE COLLECTION */
struct work_struct gc_gens_work;
@@ -1214,6 +1220,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c)
return timespec_to_bch2_time(c, now);
}
+static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw)
+{
+ return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX);
+}
+
static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
{
struct stdio_redirect *stdio = c->stdio;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 90c12fe2a2cd..e3b1bde489c3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -476,6 +476,9 @@ struct bch_lru {
#define LRU_ID_STRIPES (1U << 16)
+#define LRU_TIME_BITS 48
+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
+
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@@ -987,8 +990,9 @@ enum bch_version_upgrade_opts {
#define BCH_ERROR_ACTIONS() \
x(continue, 0) \
- x(ro, 1) \
- x(panic, 2)
+ x(fix_safe, 1) \
+ x(panic, 2) \
+ x(ro, 3)
enum bch_error_actions {
#define x(t, n) BCH_ON_ERROR_##t = n,
@@ -1382,9 +1386,10 @@ enum btree_id {
/*
* Maximum number of btrees that we will _ever_ have under the current scheme,
- * where we refer to them with bitfields
+ * where we refer to them with 64 bit bitfields - and we also need a bit for
+ * the interior btree node type:
*/
-#define BTREE_ID_NR_MAX 64
+#define BTREE_ID_NR_MAX 63
static inline bool btree_id_is_alloc(enum btree_id id)
{
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index f46978e5cb7c..587d7318a2e8 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
- u64 packed_max = f->bits_per_field[i]
- ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ unsigned packed_bits = min(64, f->bits_per_field[i]);
+ u64 packed_max = packed_bits
+ ? ~((~0ULL << 1) << (packed_bits - 1))
: 0;
prt_printf(err, "field %u too large: %llu + %llu > %llu",
@@ -1064,7 +1065,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
{
const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
u8 *l = k->key_start;
- u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
+ u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1;
while (l < h) {
swap(*l, *h);
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index fcd43915df07..936357149cf0 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
+static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
+{
+ return bpos_eq(l.k->p, r.k->p) &&
+ bkey_bytes(l.k) == bkey_bytes(r.k) &&
+ !memcmp(l.v, r.v, bkey_val_bytes(l.k));
+}
+
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index c2c3dae52186..bd32aac05192 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
for (i = 0; i < nr_compat; i++)
switch (!write ? i : nr_compat - 1 - i) {
case 0:
- if (big_endian != CPU_BIG_ENDIAN)
+ if (big_endian != CPU_BIG_ENDIAN) {
+ bch2_bkey_swab_key(f, k);
+ } else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
bch2_bkey_swab_key(f, k);
+ bch2_bkey_swab_key(f, k);
+ }
break;
case 1:
if (version < bcachefs_metadata_version_bkey_renumber)
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index 726ef7483763..baef0722f5fb 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
struct bkey_packed *k)
{
if (version < bcachefs_metadata_version_current ||
- big_endian != CPU_BIG_ENDIAN)
+ big_endian != CPU_BIG_ENDIAN ||
+ IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
__bch2_bkey_compat(level, btree_id, version,
big_endian, write, f, k);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0e477a926579..a0deb8266011 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -641,16 +641,30 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
target_depth = 0;
/* root */
- mutex_lock(&c->btree_root_lock);
- struct btree *b = bch2_btree_id_root(c, btree)->b;
- if (!btree_node_fake(b)) {
+ do {
+retry_root:
+ bch2_trans_begin(trans);
+
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
+ 0, bch2_btree_id_root(c, btree)->b->c.level, 0);
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err_root;
+
+ if (b != btree_node_root(c, b)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto retry_root;
+ }
+
gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX));
- ret = lockrestart_do(trans,
- bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
- NULL, NULL, bkey_i_to_s_c(&b->key), initial));
+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
+ ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial);
level = b->c.level;
- }
- mutex_unlock(&c->btree_root_lock);
+err_root:
+ bch2_trans_iter_exit(trans, &iter);
+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
if (ret)
return ret;
@@ -903,6 +917,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
percpu_up_read(&c->mark_lock);
+ gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
+
if (fsck_err_on(new.data_type != gc.data_type, c,
alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@@ -916,23 +932,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
#define copy_bucket_field(_errtype, _f) \
if (fsck_err_on(new._f != gc._f, c, _errtype, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
- ": got %u, should be %u", \
+ ": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
- new._f, gc._f)) \
+ (u64) new._f, (u64) gc._f)) \
new._f = gc._f; \
- copy_bucket_field(alloc_key_gen_wrong,
- gen);
- copy_bucket_field(alloc_key_dirty_sectors_wrong,
- dirty_sectors);
- copy_bucket_field(alloc_key_cached_sectors_wrong,
- cached_sectors);
- copy_bucket_field(alloc_key_stripe_wrong,
- stripe);
- copy_bucket_field(alloc_key_stripe_redundancy_wrong,
- stripe_redundancy);
+ copy_bucket_field(alloc_key_gen_wrong, gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong, stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
+ copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
@@ -946,7 +958,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
a->v = new;
/*
- * The trigger normally makes sure this is set, but we're not running
+ * The trigger normally makes sure these are set, but we're not running
* triggers:
*/
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 3694c600a3ad..19352a08ea20 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -996,7 +996,7 @@ retry_all:
bch2_trans_unlock(trans);
cond_resched();
- trans->locked = true;
+ trans_set_locked(trans);
if (unlikely(trans->memory_allocation_failure)) {
struct closure cl;
@@ -3089,7 +3089,8 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_;
- trans->locked = true;
+
+ trans_set_locked(trans);
if (trans->restarted) {
bch2_btree_path_traverse_all(trans);
@@ -3130,7 +3131,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans));
- closure_init_stack(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@@ -3150,22 +3150,16 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
BUG_ON(pos_task &&
pid == pos_task->pid &&
pos->locked);
-
- if (pos_task && pid < pos_task->pid) {
- list_add_tail(&trans->list, &pos->list);
- goto list_add_done;
- }
}
}
- list_add_tail(&trans->list, &c->btree_trans_list);
-list_add_done:
+
+ list_add(&trans->list, &c->btree_trans_list);
seqmutex_unlock(&c->btree_trans_lock);
got_trans:
trans->c = c;
trans->last_begin_time = local_clock();
trans->fn_idx = fn_idx;
trans->locking_wait.task = current;
- trans->locked = true;
trans->journal_replay_not_finished =
unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) &&
atomic_inc_not_zero(&c->journal_keys.ref);
@@ -3199,6 +3193,9 @@ got_trans:
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
+ trans_set_locked(trans);
+
+ closure_init_stack_release(&trans->ref);
return trans;
}
@@ -3235,7 +3232,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans_for_each_update(trans, i)
__btree_path_put(trans->paths + i->path, true);
trans->nr_updates = 0;
- trans->locking_wait.task = NULL;
check_btree_paths_leaked(trans);
@@ -3256,6 +3252,13 @@ void bch2_trans_put(struct btree_trans *trans)
if (unlikely(trans->journal_replay_not_finished))
bch2_journal_keys_put(c);
+ /*
+ * trans->ref protects trans->locking_wait.task, btree_paths array; used
+ * by cycle detector
+ */
+ closure_return_sync(&trans->ref);
+ trans->locking_wait.task = NULL;
+
unsigned long *paths_allocated = trans->paths_allocated;
trans->paths_allocated = NULL;
trans->paths = NULL;
@@ -3273,8 +3276,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
if (trans) {
- closure_sync(&trans->ref);
-
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
@@ -3380,8 +3381,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
if (trans) {
- closure_sync(&trans->ref);
-
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index d66fff22109a..c51826fd557f 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
prt_newline(&buf);
}
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
printbuf_exit(&buf);
BUG();
}
@@ -792,7 +792,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
return bch2_trans_relock_fail(trans, path, &f, trace);
}
- trans->locked = true;
+ trans_set_locked(trans);
out:
bch2_trans_verify_locks(trans);
return 0;
@@ -812,16 +812,14 @@ void bch2_trans_unlock_noassert(struct btree_trans *trans)
{
__bch2_trans_unlock(trans);
- trans->locked = false;
- trans->last_unlock_ip = _RET_IP_;
+ trans_set_unlocked(trans);
}
void bch2_trans_unlock(struct btree_trans *trans)
{
__bch2_trans_unlock(trans);
- trans->locked = false;
- trans->last_unlock_ip = _RET_IP_;
+ trans_set_unlocked(trans);
}
void bch2_trans_unlock_long(struct btree_trans *trans)
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 7f41545b9147..75a6274c7d27 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -193,6 +193,28 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
/* lock: */
+static inline void trans_set_locked(struct btree_trans *trans)
+{
+ if (!trans->locked) {
+ trans->locked = true;
+ trans->last_unlock_ip = 0;
+
+ trans->pf_memalloc_nofs = (current->flags & PF_MEMALLOC_NOFS) != 0;
+ current->flags |= PF_MEMALLOC_NOFS;
+ }
+}
+
+static inline void trans_set_unlocked(struct btree_trans *trans)
+{
+ if (trans->locked) {
+ trans->locked = false;
+ trans->last_unlock_ip = _RET_IP_;
+
+ if (!trans->pf_memalloc_nofs)
+ current->flags &= ~PF_MEMALLOC_NOFS;
+ }
+}
+
static inline int __btree_node_lock_nopath(struct btree_trans *trans,
struct btree_bkey_cached_common *b,
enum six_lock_type type,
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index d63db4fefe73..48cb1a7d31c5 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -484,6 +484,7 @@ struct btree_trans {
bool lock_may_not_fail:1;
bool srcu_held:1;
bool locked:1;
+ bool pf_memalloc_nofs:1;
bool write_locked:1;
bool used_mempool:1;
bool in_traverse_all:1;
@@ -761,13 +762,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
BCH_BTREE_IDS()
#undef x
;
- return (1U << type) & mask;
+ return BIT_ULL(type) & mask;
}
static inline bool btree_id_is_extents(enum btree_id btree)
@@ -777,35 +778,35 @@ static inline bool btree_id_is_extents(enum btree_id btree)
static inline bool btree_type_has_snapshots(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
struct btree_root {
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 75c8a196b3f6..d0e92d948002 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
+#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
+/**
+ * In check and repair code, when checking references to write buffer btrees we
+ * need to issue a flush before we have a definitive error: this issues a flush
+ * if this is a key we haven't yet checked.
+ */
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf tmp;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
+ bch2_bkey_buf_reassemble(&tmp, c, referring_k);
+
+ if (bkey_is_btree_ptr(referring_k.k)) {
+ bch2_trans_unlock(trans);
+ bch2_btree_interior_updates_flush(c);
+ }
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+ ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ }
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index eebcd2b15249..dd5e64218b50 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
+struct bkey_buf;
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
+
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;
size_t room;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 743d57eba760..314ee3e0187f 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -805,7 +805,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
"while marking %s",
ptr->dev, bucket_nr, b_gen,
- *bucket_gen(ca, bucket_nr),
+ bucket_gen_get(ca, bucket_nr),
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
ptr->gen,
(printbuf_reset(&buf),
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 80ee0be9793e..8ad4be73860c 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
return gens->b + b;
}
+static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b)
+{
+ rcu_read_lock();
+ u8 gen = *bucket_gen(ca, b);
+ rcu_read_unlock();
+ return gen;
+}
+
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
const struct bch_extent_ptr *ptr)
{
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 9e54323f0f5f..6d82e1165adc 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(NULL, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
@@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return ret;
ret = bch2_dev_add(c, path);
- kfree(path);
+ if (!IS_ERR(path))
+ kfree(path);
return ret;
}
@@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(c, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 363644451106..0f40b585ce2b 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
{
struct io_timer *ret = NULL;
- spin_lock(&clock->timer_lock);
-
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-
- spin_unlock(&clock->timer_lock);
-
return ret;
}
@@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
struct io_timer *timer;
unsigned long now = atomic64_add_return(sectors, &clock->now);
+ spin_lock(&clock->timer_lock);
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
+ spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 1a0072eef109..0087b8555ead 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -5,7 +5,9 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "compress.h"
#include "data_update.h"
+#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ printbuf_tabstop_push(out, 20);
+ prt_str(out, "rewrite ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "kill ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->kill_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "target:\t");
+ bch2_target_to_text(out, c, data_opts->target);
+ prt_newline(out);
+
+ prt_str(out, "compression:\t");
+ bch2_compression_opt_to_text(out, background_compression(*io_opts));
+ prt_newline(out);
+
+ prt_str(out, "extra replicas:\t");
+ prt_u64(out, data_opts->extra_replicas);
+}
+
+void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
+{
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+ prt_newline(out);
+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
+}
+
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+ if (!m->op.nr_replicas) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_data_update_to_text(&buf, m);
+ WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_data_update_done;
+ goto done;
+ }
+
m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 991095bbd469..8d36365bdea8 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -17,6 +17,9 @@ struct data_update_opts {
unsigned write_flags;
};
+void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_opts *, struct data_update_opts *);
+
struct data_update {
/* extent being updated: */
enum btree_id btree_id;
@@ -27,6 +30,8 @@ struct data_update {
struct bch_write_op op;
};
+void bch2_data_update_to_text(struct printbuf *, struct data_update *);
+
int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 51cbf3928361..ebabab171fe5 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
+typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
+
+static void list_sort(struct list_head *head, list_cmp_fn cmp)
+{
+ struct list_head *pos;
+
+ list_for_each(pos, head)
+ while (!list_is_last(pos, head) &&
+ cmp(pos, pos->next) > 0) {
+ struct list_head *pos2, *next = pos->next;
+
+ list_del(next);
+ list_for_each(pos2, head)
+ if (cmp(next, pos2) < 0)
+ goto pos_found;
+ BUG();
+pos_found:
+ list_add_tail(next, pos2);
+ }
+}
+
+static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
+{
+ return cmp_int(l, r);
+}
+
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c;
struct btree_trans *trans;
ssize_t ret = 0;
- u32 seq;
i->ubuf = buf;
i->size = size;
i->ret = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= i->iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= i->iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ i->iter = (ulong) trans;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto unlocked;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
+
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
- i->iter = task->pid;
-
closure_put(&trans->ref);
+ ret = flush_buf(i);
+ if (ret)
+ goto unlocked;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
@@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
.read = btree_transaction_stats_read,
};
-static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
+/* walk btree transactions until we find a deadlock and print it */
+static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
- struct dump_iter *i = file->private_data;
- struct bch_fs *c = i->c;
struct btree_trans *trans;
- ssize_t ret = 0;
- u32 seq;
-
- i->ubuf = buf;
- i->size = size;
- i->ret = 0;
-
- if (i->iter)
- goto out;
+ ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= i->iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ iter = (ulong) trans;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto out;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
- bch2_check_for_deadlock(trans, &i->buf);
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
- i->iter = task->pid;
+ bool found = bch2_check_for_deadlock(trans, out) != 0;
closure_put(&trans->ref);
+ if (found)
+ return;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
-out:
+}
+
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ btree_deadlock_to_text(&i->buf, c);
+ i->iter++;
+ }
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index dbe35b80bc0b..58612abf7927 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -116,6 +116,9 @@
x(ENOENT, ENOENT_dev_idx_not_found) \
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
+ x(EEXIST, EEXIST_str_hash_set) \
+ x(EEXIST, EEXIST_discard_in_flight_add) \
+ x(EEXIST, EEXIST_subvolume_create) \
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index c66eeffcd7f2..d95c40f1b6af 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
switch (c->opts.errors) {
case BCH_ON_ERROR_continue:
return false;
+ case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
@@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
prt_str(out, "ing");
}
+static const u8 fsck_flags_extra[] = {
+#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
+ BCH_SB_ERRS()
+#undef x
+};
+
int bch2_fsck_err(struct bch_fs *c,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
@@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c,
int ret = -BCH_ERR_fsck_ignore;
const char *action_orig = "fix?", *action = action_orig;
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;
@@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
- if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if ((flags & FSCK_CAN_FIX) &&
+ (flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe)) {
+ prt_str(out, ", ");
+ prt_actioning(out, action);
+ ret = -BCH_ERR_fsck_fix;
+ } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 36caedf72d89..777711504c35 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -108,13 +108,6 @@ struct fsck_err_state {
char *last_msg;
};
-enum bch_fsck_flags {
- FSCK_CAN_FIX = 1 << 0,
- FSCK_CAN_IGNORE = 1 << 1,
- FSCK_NEED_FSCK = 1 << 2,
- FSCK_NO_RATELIMIT = 1 << 3,
-};
-
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
__printf(4, 5) __cold
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index 24840aee335c..795f4fc0bab1 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size)
{
- return rounddown_pow_of_two(size);
+ return size ? rounddown_pow_of_two(size) : 0;
}
static inline unsigned eytzinger1_last(unsigned size)
@@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size)
{
- return (size + 1 - rounddown_pow_of_two(size)) << 1;
+ return size
+ ? (size + 1 - rounddown_pow_of_two(size)) << 1
+ : 0;
}
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 3551a737181b..79a0c8732bce 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -373,7 +373,7 @@ retry:
}
if (dst_dentry->d_inode) {
- error = -EEXIST;
+ error = -BCH_ERR_EEXIST_subvolume_create;
goto err3;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 77126992dba8..0c7d1bc0548a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -188,6 +188,18 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
+ /*
+ * bcachefs doesn't use I_NEW; we have no use for it since we
+ * only insert fully created inodes in the inode hash table. But
+ * discard_new_inode() expects it to be set...
+ */
+ inode->v.i_flags |= I_NEW;
+ /*
+ * We don't want bch2_evict_inode() to delete the inode on disk,
+ * we just raced and had another inode in cache. Normally new
+ * inodes don't have nlink == 0 - except tmpfiles do...
+ */
+ set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
inode = old;
} else {
@@ -195,8 +207,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
/*
- * we really don't want insert_inode_locked2() to be setting
- * I_NEW...
+ * Again, I_NEW makes no sense for bcachefs. This is only needed
+ * for clearing I_NEW, but since the inode was already fully
+ * created and initialized we didn't actually want
+ * inode_insert5() to set it for us.
*/
unlock_new_inode(&inode->v);
}
@@ -230,7 +244,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
inode->ei_flags = 0;
mutex_init(&inode->ei_quota_lock);
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
- inode->v.i_state = 0;
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
kmem_cache_free(bch2_inode_cache, inode);
@@ -1157,6 +1170,7 @@ static const struct file_operations bch_file_operations = {
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
+ .get_unmapped_area = thp_get_unmapped_area,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
@@ -1488,11 +1502,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
bch2_iget5_set(&inode->v, &inum);
bch2_inode_update_after_write(trans, inode, bi, ~0);
- if (BCH_SUBVOLUME_SNAP(subvol))
- set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
- else
- clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
-
inode->v.i_blocks = bi->bi_sectors;
inode->v.i_ino = bi->bi_inum;
inode->v.i_rdev = bi->bi_dev;
@@ -1504,6 +1513,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
+ if (BCH_SUBVOLUME_SNAP(subvol))
+ set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
+
inode->v.i_mapping->a_ops = &bch_address_space_operations;
switch (inode->v.i_mode & S_IFMT) {
@@ -2019,6 +2031,8 @@ err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
err:
+ if (ret)
+ pr_err("error: %s", bch2_err_str(ret));
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 4ec979b4b23e..4583c9386e8c 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -125,7 +125,7 @@ err_noprint:
bch2_bkey_buf_exit(&old, c);
if (closure_nr_remaining(&cl) != 1) {
- bch2_trans_unlock(trans);
+ bch2_trans_unlock_long(trans);
closure_sync(&cl);
}
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index c97fa7002b06..ebf39ef72fb2 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
@@ -1004,6 +1003,9 @@ get_bio:
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
+ if (flags & BCH_READ_NODECODE)
+ orig->pick = pick;
+
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index adec8e1ea73e..10b19791ec98 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1095,7 +1095,7 @@ unlock:
return ret;
}
-int bch2_dev_journal_alloc(struct bch_dev *ca)
+int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
unsigned nr;
int ret;
@@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+ ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
err:
bch_err_fn(ca, ret);
return ret;
@@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
if (ca->journal.nr)
continue;
- int ret = bch2_dev_journal_alloc(ca);
+ int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
@@ -1167,6 +1167,9 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
void bch2_fs_journal_stop(struct journal *j)
{
+ if (!test_bit(JOURNAL_running, &j->flags))
+ return;
+
bch2_journal_reclaim_stop(j);
bch2_journal_flush_all_pins(j);
@@ -1181,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
- BUG_ON(!bch2_journal_error(j) &&
- test_bit(JOURNAL_replay_done, &j->flags) &&
- j->last_empty_seq != journal_cur_seq(j));
+ WARN(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_replay_done, &j->flags) &&
+ j->last_empty_seq != journal_cur_seq(j),
+ "journal shutdown error: cur seq %llu but last empty seq %llu",
+ journal_cur_seq(j), j->last_empty_seq);
if (!bch2_journal_error(j))
clear_bit(JOURNAL_running, &j->flags);
@@ -1415,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned long now = jiffies;
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
- if (!out->nr_tabstops)
- printbuf_tabstop_push(out, 28);
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 28);
out->atomic++;
rcu_read_lock();
@@ -1518,6 +1523,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin *pin;
spin_lock(&j->lock);
+ if (!test_bit(JOURNAL_running, &j->flags)) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index fd1f7cdaa8bc..bc6b9c39dcb4 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
-int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index cdcb1ad49af4..2326e2cb9cd2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
flags|BCH_VALIDATE_journal);
if (ret == FSCK_DELETED_KEY)
continue;
+ else if (ret)
+ return ret;
k = bkey_next(k);
}
@@ -1677,6 +1679,13 @@ static CLOSURE_CALLBACK(journal_write_done)
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
}
+ /*
+ * We don't typically trigger journal writes from her - the next journal
+ * write will be triggered immediately after the previous one is
+ * allocated, in bch2_journal_write() - but the journal write error path
+ * is special:
+ */
+ bch2_journal_do_writes(j);
spin_unlock(&j->lock);
}
@@ -1755,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
spin_lock(&j->lock);
- closure_wait(&j->async_wait, cl);
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
spin_unlock(&j->lock);
-
- continue_at(cl, journal_write_preflush, j->wq);
- return;
}
if (w->separate_flush) {
@@ -1967,7 +1978,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
- struct printbuf journal_debug_buf = PRINTBUF;
unsigned nr_rw_members = 0;
int ret;
@@ -2011,11 +2021,15 @@ CLOSURE_CALLBACK(bch2_journal_write)
}
if (ret) {
- __bch2_journal_debug_to_text(&journal_debug_buf, j);
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+
+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write: %s"),
+ bch2_err_str(ret));
+ __bch2_journal_debug_to_text(&buf, j);
spin_unlock(&j->lock);
- bch_err(c, "Unable to allocate journal write:\n%s",
- journal_debug_buf.buf);
- printbuf_exit(&journal_debug_buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
goto err;
}
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index ed4846709611..1f25c111c54c 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
BUG_ON(nr != t->nr);
unsigned i;
- for (src = bl->start, i = eytzinger0_first(t->nr);
+ for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index a40d116224ed..b12894ef44f3 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
NULL
};
+int bch2_lru_check_set(struct btree_trans *trans,
+ u16 lru_id, u64 time,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter lru_iter;
+ struct bkey_s_c lru_k =
+ bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
+ lru_pos(lru_id,
+ bucket_to_u64(referring_k.k->p),
+ time), 0);
+ int ret = bkey_err(lru_k);
+ if (ret)
+ return ret;
+
+ if (lru_k.k->type != KEY_TYPE_set) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
+ if (ret)
+ goto err;
+
+ if (fsck_err(c, alloc_key_to_missing_lru_entry,
+ "missing %s lru entry\n"
+ " %s",
+ bch2_lru_types[lru_type(lru_k)],
+ (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
+ if (ret)
+ goto err;
+ }
+ }
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &lru_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index fb11ab0dd00e..ed75bcf59d47 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -2,9 +2,6 @@
#ifndef _BCACHEFS_LRU_H
#define _BCACHEFS_LRU_H
-#define LRU_TIME_BITS 48
-#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
-
static inline u64 lru_pos_id(struct bpos pos)
{
return pos.inode >> LRU_TIME_BITS;
@@ -64,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+struct bkey_buf;
+int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
+
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 6e477fadaa2a..e714e3bd5bbb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
-static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
-{
- printbuf_tabstop_push(out, 20);
- prt_str(out, "rewrite ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
- prt_newline(out);
-
- prt_str(out, "kill ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->kill_ptrs);
- prt_newline(out);
-
- prt_str(out, "target:\t");
- bch2_target_to_text(out, c, data_opts->target);
- prt_newline(out);
-
- prt_str(out, "compression:\t");
- bch2_compression_opt_to_text(out, background_compression(*io_opts));
- prt_newline(out);
-
- prt_str(out, "extra replicas:\t");
- prt_u64(out, data_opts->extra_replicas);
-}
-
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 25530e0bb2f3..b197ec90d4cb 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -137,7 +137,7 @@ enum fsck_err_opts {
x(errors, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_error_actions), \
- BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
+ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
NULL, "Action to take on filesystem error") \
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index cf513fc79ce4..1f9d044ed920 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -326,6 +326,12 @@ static int journal_replay_entry_early(struct bch_fs *c,
case BCH_JSET_ENTRY_btree_root: {
struct btree_root *r;
+ if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
+ c, invalid_btree_id,
+ "invalid btree id %u (max %u)",
+ entry->btree_id, BTREE_ID_NR_MAX))
+ return 0;
+
while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
if (ret)
@@ -415,7 +421,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
}
}
-
+fsck_err:
return ret;
}
@@ -658,10 +664,10 @@ int bch2_fs_recovery(struct bch_fs *c)
if (check_version_upgrade(c))
write_sb = true;
+ c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
if (write_sb)
bch2_write_super(c);
-
- c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
mutex_unlock(&c->sb_lock);
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 3fb23e399ffb..4710b61631f0 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -228,7 +228,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
- dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+ dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
dst->recovery_passes[1] = 0;
dst->nr_errors = cpu_to_le16(src->nr_errors);
for (unsigned i = 0; i < src->nr_errors; i++)
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index bda33e59e226..c1270d790e43 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -110,19 +110,25 @@ out:
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
bch_sb_errors_cpu *src = &c->fsck_error_counts;
- struct bch_sb_field_errors *dst =
- bch2_sb_field_resize(&c->disk_sb, errors,
- bch2_sb_field_errors_u64s(src->nr));
+ struct bch_sb_field_errors *dst;
unsigned i;
+ mutex_lock(&c->fsck_error_counts_lock);
+
+ dst = bch2_sb_field_resize(&c->disk_sb, errors,
+ bch2_sb_field_errors_u64s(src->nr));
+
if (!dst)
- return;
+ goto err;
for (i = 0; i < src->nr; i++) {
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
}
+
+err:
+ mutex_unlock(&c->fsck_error_counts_lock);
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 84d2763bd597..d54121ec093f 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -2,281 +2,295 @@
#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
#define _BCACHEFS_SB_ERRORS_FORMAT_H
-#define BCH_SB_ERRS() \
- x(clean_but_journal_not_empty, 0) \
- x(dirty_but_no_journal_entries, 1) \
- x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
- x(sb_clean_journal_seq_mismatch, 3) \
- x(sb_clean_btree_root_mismatch, 4) \
- x(sb_clean_missing, 5) \
- x(jset_unsupported_version, 6) \
- x(jset_unknown_csum, 7) \
- x(jset_last_seq_newer_than_seq, 8) \
- x(jset_past_bucket_end, 9) \
- x(jset_seq_blacklisted, 10) \
- x(journal_entries_missing, 11) \
- x(journal_entry_replicas_not_marked, 12) \
- x(journal_entry_past_jset_end, 13) \
- x(journal_entry_replicas_data_mismatch, 14) \
- x(journal_entry_bkey_u64s_0, 15) \
- x(journal_entry_bkey_past_end, 16) \
- x(journal_entry_bkey_bad_format, 17) \
- x(journal_entry_bkey_invalid, 18) \
- x(journal_entry_btree_root_bad_size, 19) \
- x(journal_entry_blacklist_bad_size, 20) \
- x(journal_entry_blacklist_v2_bad_size, 21) \
- x(journal_entry_blacklist_v2_start_past_end, 22) \
- x(journal_entry_usage_bad_size, 23) \
- x(journal_entry_data_usage_bad_size, 24) \
- x(journal_entry_clock_bad_size, 25) \
- x(journal_entry_clock_bad_rw, 26) \
- x(journal_entry_dev_usage_bad_size, 27) \
- x(journal_entry_dev_usage_bad_dev, 28) \
- x(journal_entry_dev_usage_bad_pad, 29) \
- x(btree_node_unreadable, 30) \
- x(btree_node_fault_injected, 31) \
- x(btree_node_bad_magic, 32) \
- x(btree_node_bad_seq, 33) \
- x(btree_node_unsupported_version, 34) \
- x(btree_node_bset_older_than_sb_min, 35) \
- x(btree_node_bset_newer_than_sb, 36) \
- x(btree_node_data_missing, 37) \
- x(btree_node_bset_after_end, 38) \
- x(btree_node_replicas_sectors_written_mismatch, 39) \
- x(btree_node_replicas_data_mismatch, 40) \
- x(bset_unknown_csum, 41) \
- x(bset_bad_csum, 42) \
- x(bset_past_end_of_btree_node, 43) \
- x(bset_wrong_sector_offset, 44) \
- x(bset_empty, 45) \
- x(bset_bad_seq, 46) \
- x(bset_blacklisted_journal_seq, 47) \
- x(first_bset_blacklisted_journal_seq, 48) \
- x(btree_node_bad_btree, 49) \
- x(btree_node_bad_level, 50) \
- x(btree_node_bad_min_key, 51) \
- x(btree_node_bad_max_key, 52) \
- x(btree_node_bad_format, 53) \
- x(btree_node_bkey_past_bset_end, 54) \
- x(btree_node_bkey_bad_format, 55) \
- x(btree_node_bad_bkey, 56) \
- x(btree_node_bkey_out_of_order, 57) \
- x(btree_root_bkey_invalid, 58) \
- x(btree_root_read_error, 59) \
- x(btree_root_bad_min_key, 60) \
- x(btree_root_bad_max_key, 61) \
- x(btree_node_read_error, 62) \
- x(btree_node_topology_bad_min_key, 63) \
- x(btree_node_topology_bad_max_key, 64) \
- x(btree_node_topology_overwritten_by_prev_node, 65) \
- x(btree_node_topology_overwritten_by_next_node, 66) \
- x(btree_node_topology_interior_node_empty, 67) \
- x(fs_usage_hidden_wrong, 68) \
- x(fs_usage_btree_wrong, 69) \
- x(fs_usage_data_wrong, 70) \
- x(fs_usage_cached_wrong, 71) \
- x(fs_usage_reserved_wrong, 72) \
- x(fs_usage_persistent_reserved_wrong, 73) \
- x(fs_usage_nr_inodes_wrong, 74) \
- x(fs_usage_replicas_wrong, 75) \
- x(dev_usage_buckets_wrong, 76) \
- x(dev_usage_sectors_wrong, 77) \
- x(dev_usage_fragmented_wrong, 78) \
- x(dev_usage_buckets_ec_wrong, 79) \
- x(bkey_version_in_future, 80) \
- x(bkey_u64s_too_small, 81) \
- x(bkey_invalid_type_for_btree, 82) \
- x(bkey_extent_size_zero, 83) \
- x(bkey_extent_size_greater_than_offset, 84) \
- x(bkey_size_nonzero, 85) \
- x(bkey_snapshot_nonzero, 86) \
- x(bkey_snapshot_zero, 87) \
- x(bkey_at_pos_max, 88) \
- x(bkey_before_start_of_btree_node, 89) \
- x(bkey_after_end_of_btree_node, 90) \
- x(bkey_val_size_nonzero, 91) \
- x(bkey_val_size_too_small, 92) \
- x(alloc_v1_val_size_bad, 93) \
- x(alloc_v2_unpack_error, 94) \
- x(alloc_v3_unpack_error, 95) \
- x(alloc_v4_val_size_bad, 96) \
- x(alloc_v4_backpointers_start_bad, 97) \
- x(alloc_key_data_type_bad, 98) \
- x(alloc_key_empty_but_have_data, 99) \
- x(alloc_key_dirty_sectors_0, 100) \
- x(alloc_key_data_type_inconsistency, 101) \
- x(alloc_key_to_missing_dev_bucket, 102) \
- x(alloc_key_cached_inconsistency, 103) \
- x(alloc_key_cached_but_read_time_zero, 104) \
- x(alloc_key_to_missing_lru_entry, 105) \
- x(alloc_key_data_type_wrong, 106) \
- x(alloc_key_gen_wrong, 107) \
- x(alloc_key_dirty_sectors_wrong, 108) \
- x(alloc_key_cached_sectors_wrong, 109) \
- x(alloc_key_stripe_wrong, 110) \
- x(alloc_key_stripe_redundancy_wrong, 111) \
- x(bucket_sector_count_overflow, 112) \
- x(bucket_metadata_type_mismatch, 113) \
- x(need_discard_key_wrong, 114) \
- x(freespace_key_wrong, 115) \
- x(freespace_hole_missing, 116) \
- x(bucket_gens_val_size_bad, 117) \
- x(bucket_gens_key_wrong, 118) \
- x(bucket_gens_hole_wrong, 119) \
- x(bucket_gens_to_invalid_dev, 120) \
- x(bucket_gens_to_invalid_buckets, 121) \
- x(bucket_gens_nonzero_for_invalid_buckets, 122) \
- x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
- x(need_discard_freespace_key_bad, 124) \
- x(backpointer_bucket_offset_wrong, 125) \
- x(backpointer_to_missing_device, 126) \
- x(backpointer_to_missing_alloc, 127) \
- x(backpointer_to_missing_ptr, 128) \
- x(lru_entry_at_time_0, 129) \
- x(lru_entry_to_invalid_bucket, 130) \
- x(lru_entry_bad, 131) \
- x(btree_ptr_val_too_big, 132) \
- x(btree_ptr_v2_val_too_big, 133) \
- x(btree_ptr_has_non_ptr, 134) \
- x(extent_ptrs_invalid_entry, 135) \
- x(extent_ptrs_no_ptrs, 136) \
- x(extent_ptrs_too_many_ptrs, 137) \
- x(extent_ptrs_redundant_crc, 138) \
- x(extent_ptrs_redundant_stripe, 139) \
- x(extent_ptrs_unwritten, 140) \
- x(extent_ptrs_written_and_unwritten, 141) \
- x(ptr_to_invalid_device, 142) \
- x(ptr_to_duplicate_device, 143) \
- x(ptr_after_last_bucket, 144) \
- x(ptr_before_first_bucket, 145) \
- x(ptr_spans_multiple_buckets, 146) \
- x(ptr_to_missing_backpointer, 147) \
- x(ptr_to_missing_alloc_key, 148) \
- x(ptr_to_missing_replicas_entry, 149) \
- x(ptr_to_missing_stripe, 150) \
- x(ptr_to_incorrect_stripe, 151) \
- x(ptr_gen_newer_than_bucket_gen, 152) \
- x(ptr_too_stale, 153) \
- x(stale_dirty_ptr, 154) \
- x(ptr_bucket_data_type_mismatch, 155) \
- x(ptr_cached_and_erasure_coded, 156) \
- x(ptr_crc_uncompressed_size_too_small, 157) \
- x(ptr_crc_csum_type_unknown, 158) \
- x(ptr_crc_compression_type_unknown, 159) \
- x(ptr_crc_redundant, 160) \
- x(ptr_crc_uncompressed_size_too_big, 161) \
- x(ptr_crc_nonce_mismatch, 162) \
- x(ptr_stripe_redundant, 163) \
- x(reservation_key_nr_replicas_invalid, 164) \
- x(reflink_v_refcount_wrong, 165) \
- x(reflink_p_to_missing_reflink_v, 166) \
- x(stripe_pos_bad, 167) \
- x(stripe_val_size_bad, 168) \
- x(stripe_sector_count_wrong, 169) \
- x(snapshot_tree_pos_bad, 170) \
- x(snapshot_tree_to_missing_snapshot, 171) \
- x(snapshot_tree_to_missing_subvol, 172) \
- x(snapshot_tree_to_wrong_subvol, 173) \
- x(snapshot_tree_to_snapshot_subvol, 174) \
- x(snapshot_pos_bad, 175) \
- x(snapshot_parent_bad, 176) \
- x(snapshot_children_not_normalized, 177) \
- x(snapshot_child_duplicate, 178) \
- x(snapshot_child_bad, 179) \
- x(snapshot_skiplist_not_normalized, 180) \
- x(snapshot_skiplist_bad, 181) \
- x(snapshot_should_not_have_subvol, 182) \
- x(snapshot_to_bad_snapshot_tree, 183) \
- x(snapshot_bad_depth, 184) \
- x(snapshot_bad_skiplist, 185) \
- x(subvol_pos_bad, 186) \
- x(subvol_not_master_and_not_snapshot, 187) \
- x(subvol_to_missing_root, 188) \
- x(subvol_root_wrong_bi_subvol, 189) \
- x(bkey_in_missing_snapshot, 190) \
- x(inode_pos_inode_nonzero, 191) \
- x(inode_pos_blockdev_range, 192) \
- x(inode_unpack_error, 193) \
- x(inode_str_hash_invalid, 194) \
- x(inode_v3_fields_start_bad, 195) \
- x(inode_snapshot_mismatch, 196) \
- x(inode_unlinked_but_clean, 197) \
- x(inode_unlinked_but_nlink_nonzero, 198) \
- x(inode_checksum_type_invalid, 199) \
- x(inode_compression_type_invalid, 200) \
- x(inode_subvol_root_but_not_dir, 201) \
- x(inode_i_size_dirty_but_clean, 202) \
- x(inode_i_sectors_dirty_but_clean, 203) \
- x(inode_i_sectors_wrong, 204) \
- x(inode_dir_wrong_nlink, 205) \
- x(inode_dir_multiple_links, 206) \
- x(inode_multiple_links_but_nlink_0, 207) \
- x(inode_wrong_backpointer, 208) \
- x(inode_wrong_nlink, 209) \
- x(inode_unreachable, 210) \
- x(deleted_inode_but_clean, 211) \
- x(deleted_inode_missing, 212) \
- x(deleted_inode_is_dir, 213) \
- x(deleted_inode_not_unlinked, 214) \
- x(extent_overlapping, 215) \
- x(extent_in_missing_inode, 216) \
- x(extent_in_non_reg_inode, 217) \
- x(extent_past_end_of_inode, 218) \
- x(dirent_empty_name, 219) \
- x(dirent_val_too_big, 220) \
- x(dirent_name_too_long, 221) \
- x(dirent_name_embedded_nul, 222) \
- x(dirent_name_dot_or_dotdot, 223) \
- x(dirent_name_has_slash, 224) \
- x(dirent_d_type_wrong, 225) \
- x(inode_bi_parent_wrong, 226) \
- x(dirent_in_missing_dir_inode, 227) \
- x(dirent_in_non_dir_inode, 228) \
- x(dirent_to_missing_inode, 229) \
- x(dirent_to_missing_subvol, 230) \
- x(dirent_to_itself, 231) \
- x(quota_type_invalid, 232) \
- x(xattr_val_size_too_small, 233) \
- x(xattr_val_size_too_big, 234) \
- x(xattr_invalid_type, 235) \
- x(xattr_name_invalid_chars, 236) \
- x(xattr_in_missing_inode, 237) \
- x(root_subvol_missing, 238) \
- x(root_dir_missing, 239) \
- x(root_inode_not_dir, 240) \
- x(dir_loop, 241) \
- x(hash_table_key_duplicate, 242) \
- x(hash_table_key_wrong_offset, 243) \
- x(unlinked_inode_not_on_deleted_list, 244) \
- x(reflink_p_front_pad_bad, 245) \
- x(journal_entry_dup_same_device, 246) \
- x(inode_bi_subvol_missing, 247) \
- x(inode_bi_subvol_wrong, 248) \
- x(inode_points_to_missing_dirent, 249) \
- x(inode_points_to_wrong_dirent, 250) \
- x(inode_bi_parent_nonzero, 251) \
- x(dirent_to_missing_parent_subvol, 252) \
- x(dirent_not_visible_in_parent_subvol, 253) \
- x(subvol_fs_path_parent_wrong, 254) \
- x(subvol_root_fs_path_parent_nonzero, 255) \
- x(subvol_children_not_set, 256) \
- x(subvol_children_bad, 257) \
- x(subvol_loop, 258) \
- x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260) \
- x(btree_node_topology_empty_interior_node, 261) \
- x(btree_ptr_v2_min_key_bad, 262) \
- x(btree_root_unreadable_and_scan_found_nothing, 263) \
- x(snapshot_node_missing, 264) \
- x(dup_backpointer_to_bad_csum_extent, 265) \
- x(btree_bitmap_not_marked, 266) \
- x(sb_clean_entry_overrun, 267) \
- x(btree_ptr_v2_written_0, 268) \
- x(subvol_snapshot_bad, 269) \
- x(subvol_inode_bad, 270)
+enum bch_fsck_flags {
+ FSCK_CAN_FIX = 1 << 0,
+ FSCK_CAN_IGNORE = 1 << 1,
+ FSCK_NEED_FSCK = 1 << 2,
+ FSCK_NO_RATELIMIT = 1 << 3,
+ FSCK_AUTOFIX = 1 << 4,
+};
+
+#define BCH_SB_ERRS() \
+ x(clean_but_journal_not_empty, 0, 0) \
+ x(dirty_but_no_journal_entries, 1, 0) \
+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \
+ x(sb_clean_journal_seq_mismatch, 3, 0) \
+ x(sb_clean_btree_root_mismatch, 4, 0) \
+ x(sb_clean_missing, 5, 0) \
+ x(jset_unsupported_version, 6, 0) \
+ x(jset_unknown_csum, 7, 0) \
+ x(jset_last_seq_newer_than_seq, 8, 0) \
+ x(jset_past_bucket_end, 9, 0) \
+ x(jset_seq_blacklisted, 10, 0) \
+ x(journal_entries_missing, 11, 0) \
+ x(journal_entry_replicas_not_marked, 12, 0) \
+ x(journal_entry_past_jset_end, 13, 0) \
+ x(journal_entry_replicas_data_mismatch, 14, 0) \
+ x(journal_entry_bkey_u64s_0, 15, 0) \
+ x(journal_entry_bkey_past_end, 16, 0) \
+ x(journal_entry_bkey_bad_format, 17, 0) \
+ x(journal_entry_bkey_invalid, 18, 0) \
+ x(journal_entry_btree_root_bad_size, 19, 0) \
+ x(journal_entry_blacklist_bad_size, 20, 0) \
+ x(journal_entry_blacklist_v2_bad_size, 21, 0) \
+ x(journal_entry_blacklist_v2_start_past_end, 22, 0) \
+ x(journal_entry_usage_bad_size, 23, 0) \
+ x(journal_entry_data_usage_bad_size, 24, 0) \
+ x(journal_entry_clock_bad_size, 25, 0) \
+ x(journal_entry_clock_bad_rw, 26, 0) \
+ x(journal_entry_dev_usage_bad_size, 27, 0) \
+ x(journal_entry_dev_usage_bad_dev, 28, 0) \
+ x(journal_entry_dev_usage_bad_pad, 29, 0) \
+ x(btree_node_unreadable, 30, 0) \
+ x(btree_node_fault_injected, 31, 0) \
+ x(btree_node_bad_magic, 32, 0) \
+ x(btree_node_bad_seq, 33, 0) \
+ x(btree_node_unsupported_version, 34, 0) \
+ x(btree_node_bset_older_than_sb_min, 35, 0) \
+ x(btree_node_bset_newer_than_sb, 36, 0) \
+ x(btree_node_data_missing, 37, 0) \
+ x(btree_node_bset_after_end, 38, 0) \
+ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
+ x(btree_node_replicas_data_mismatch, 40, 0) \
+ x(bset_unknown_csum, 41, 0) \
+ x(bset_bad_csum, 42, 0) \
+ x(bset_past_end_of_btree_node, 43, 0) \
+ x(bset_wrong_sector_offset, 44, 0) \
+ x(bset_empty, 45, 0) \
+ x(bset_bad_seq, 46, 0) \
+ x(bset_blacklisted_journal_seq, 47, 0) \
+ x(first_bset_blacklisted_journal_seq, 48, 0) \
+ x(btree_node_bad_btree, 49, 0) \
+ x(btree_node_bad_level, 50, 0) \
+ x(btree_node_bad_min_key, 51, 0) \
+ x(btree_node_bad_max_key, 52, 0) \
+ x(btree_node_bad_format, 53, 0) \
+ x(btree_node_bkey_past_bset_end, 54, 0) \
+ x(btree_node_bkey_bad_format, 55, 0) \
+ x(btree_node_bad_bkey, 56, 0) \
+ x(btree_node_bkey_out_of_order, 57, 0) \
+ x(btree_root_bkey_invalid, 58, 0) \
+ x(btree_root_read_error, 59, 0) \
+ x(btree_root_bad_min_key, 60, 0) \
+ x(btree_root_bad_max_key, 61, 0) \
+ x(btree_node_read_error, 62, 0) \
+ x(btree_node_topology_bad_min_key, 63, 0) \
+ x(btree_node_topology_bad_max_key, 64, 0) \
+ x(btree_node_topology_overwritten_by_prev_node, 65, 0) \
+ x(btree_node_topology_overwritten_by_next_node, 66, 0) \
+ x(btree_node_topology_interior_node_empty, 67, 0) \
+ x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \
+ x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \
+ x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \
+ x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \
+ x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \
+ x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \
+ x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \
+ x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \
+ x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \
+ x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \
+ x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \
+ x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \
+ x(bkey_version_in_future, 80, 0) \
+ x(bkey_u64s_too_small, 81, 0) \
+ x(bkey_invalid_type_for_btree, 82, 0) \
+ x(bkey_extent_size_zero, 83, 0) \
+ x(bkey_extent_size_greater_than_offset, 84, 0) \
+ x(bkey_size_nonzero, 85, 0) \
+ x(bkey_snapshot_nonzero, 86, 0) \
+ x(bkey_snapshot_zero, 87, 0) \
+ x(bkey_at_pos_max, 88, 0) \
+ x(bkey_before_start_of_btree_node, 89, 0) \
+ x(bkey_after_end_of_btree_node, 90, 0) \
+ x(bkey_val_size_nonzero, 91, 0) \
+ x(bkey_val_size_too_small, 92, 0) \
+ x(alloc_v1_val_size_bad, 93, 0) \
+ x(alloc_v2_unpack_error, 94, 0) \
+ x(alloc_v3_unpack_error, 95, 0) \
+ x(alloc_v4_val_size_bad, 96, 0) \
+ x(alloc_v4_backpointers_start_bad, 97, 0) \
+ x(alloc_key_data_type_bad, 98, 0) \
+ x(alloc_key_empty_but_have_data, 99, 0) \
+ x(alloc_key_dirty_sectors_0, 100, 0) \
+ x(alloc_key_data_type_inconsistency, 101, 0) \
+ x(alloc_key_to_missing_dev_bucket, 102, 0) \
+ x(alloc_key_cached_inconsistency, 103, 0) \
+ x(alloc_key_cached_but_read_time_zero, 104, 0) \
+ x(alloc_key_to_missing_lru_entry, 105, 0) \
+ x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \
+ x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \
+ x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \
+ x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \
+ x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \
+ x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
+ x(bucket_sector_count_overflow, 112, 0) \
+ x(bucket_metadata_type_mismatch, 113, 0) \
+ x(need_discard_key_wrong, 114, 0) \
+ x(freespace_key_wrong, 115, 0) \
+ x(freespace_hole_missing, 116, 0) \
+ x(bucket_gens_val_size_bad, 117, 0) \
+ x(bucket_gens_key_wrong, 118, 0) \
+ x(bucket_gens_hole_wrong, 119, 0) \
+ x(bucket_gens_to_invalid_dev, 120, 0) \
+ x(bucket_gens_to_invalid_buckets, 121, 0) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \
+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
+ x(need_discard_freespace_key_bad, 124, 0) \
+ x(backpointer_bucket_offset_wrong, 125, 0) \
+ x(backpointer_to_missing_device, 126, 0) \
+ x(backpointer_to_missing_alloc, 127, 0) \
+ x(backpointer_to_missing_ptr, 128, 0) \
+ x(lru_entry_at_time_0, 129, 0) \
+ x(lru_entry_to_invalid_bucket, 130, 0) \
+ x(lru_entry_bad, 131, 0) \
+ x(btree_ptr_val_too_big, 132, 0) \
+ x(btree_ptr_v2_val_too_big, 133, 0) \
+ x(btree_ptr_has_non_ptr, 134, 0) \
+ x(extent_ptrs_invalid_entry, 135, 0) \
+ x(extent_ptrs_no_ptrs, 136, 0) \
+ x(extent_ptrs_too_many_ptrs, 137, 0) \
+ x(extent_ptrs_redundant_crc, 138, 0) \
+ x(extent_ptrs_redundant_stripe, 139, 0) \
+ x(extent_ptrs_unwritten, 140, 0) \
+ x(extent_ptrs_written_and_unwritten, 141, 0) \
+ x(ptr_to_invalid_device, 142, 0) \
+ x(ptr_to_duplicate_device, 143, 0) \
+ x(ptr_after_last_bucket, 144, 0) \
+ x(ptr_before_first_bucket, 145, 0) \
+ x(ptr_spans_multiple_buckets, 146, 0) \
+ x(ptr_to_missing_backpointer, 147, 0) \
+ x(ptr_to_missing_alloc_key, 148, 0) \
+ x(ptr_to_missing_replicas_entry, 149, 0) \
+ x(ptr_to_missing_stripe, 150, 0) \
+ x(ptr_to_incorrect_stripe, 151, 0) \
+ x(ptr_gen_newer_than_bucket_gen, 152, 0) \
+ x(ptr_too_stale, 153, 0) \
+ x(stale_dirty_ptr, 154, 0) \
+ x(ptr_bucket_data_type_mismatch, 155, 0) \
+ x(ptr_cached_and_erasure_coded, 156, 0) \
+ x(ptr_crc_uncompressed_size_too_small, 157, 0) \
+ x(ptr_crc_csum_type_unknown, 158, 0) \
+ x(ptr_crc_compression_type_unknown, 159, 0) \
+ x(ptr_crc_redundant, 160, 0) \
+ x(ptr_crc_uncompressed_size_too_big, 161, 0) \
+ x(ptr_crc_nonce_mismatch, 162, 0) \
+ x(ptr_stripe_redundant, 163, 0) \
+ x(reservation_key_nr_replicas_invalid, 164, 0) \
+ x(reflink_v_refcount_wrong, 165, 0) \
+ x(reflink_p_to_missing_reflink_v, 166, 0) \
+ x(stripe_pos_bad, 167, 0) \
+ x(stripe_val_size_bad, 168, 0) \
+ x(stripe_sector_count_wrong, 169, 0) \
+ x(snapshot_tree_pos_bad, 170, 0) \
+ x(snapshot_tree_to_missing_snapshot, 171, 0) \
+ x(snapshot_tree_to_missing_subvol, 172, 0) \
+ x(snapshot_tree_to_wrong_subvol, 173, 0) \
+ x(snapshot_tree_to_snapshot_subvol, 174, 0) \
+ x(snapshot_pos_bad, 175, 0) \
+ x(snapshot_parent_bad, 176, 0) \
+ x(snapshot_children_not_normalized, 177, 0) \
+ x(snapshot_child_duplicate, 178, 0) \
+ x(snapshot_child_bad, 179, 0) \
+ x(snapshot_skiplist_not_normalized, 180, 0) \
+ x(snapshot_skiplist_bad, 181, 0) \
+ x(snapshot_should_not_have_subvol, 182, 0) \
+ x(snapshot_to_bad_snapshot_tree, 183, 0) \
+ x(snapshot_bad_depth, 184, 0) \
+ x(snapshot_bad_skiplist, 185, 0) \
+ x(subvol_pos_bad, 186, 0) \
+ x(subvol_not_master_and_not_snapshot, 187, 0) \
+ x(subvol_to_missing_root, 188, 0) \
+ x(subvol_root_wrong_bi_subvol, 189, 0) \
+ x(bkey_in_missing_snapshot, 190, 0) \
+ x(inode_pos_inode_nonzero, 191, 0) \
+ x(inode_pos_blockdev_range, 192, 0) \
+ x(inode_unpack_error, 193, 0) \
+ x(inode_str_hash_invalid, 194, 0) \
+ x(inode_v3_fields_start_bad, 195, 0) \
+ x(inode_snapshot_mismatch, 196, 0) \
+ x(inode_unlinked_but_clean, 197, 0) \
+ x(inode_unlinked_but_nlink_nonzero, 198, 0) \
+ x(inode_checksum_type_invalid, 199, 0) \
+ x(inode_compression_type_invalid, 200, 0) \
+ x(inode_subvol_root_but_not_dir, 201, 0) \
+ x(inode_i_size_dirty_but_clean, 202, 0) \
+ x(inode_i_sectors_dirty_but_clean, 203, 0) \
+ x(inode_i_sectors_wrong, 204, 0) \
+ x(inode_dir_wrong_nlink, 205, 0) \
+ x(inode_dir_multiple_links, 206, 0) \
+ x(inode_multiple_links_but_nlink_0, 207, 0) \
+ x(inode_wrong_backpointer, 208, 0) \
+ x(inode_wrong_nlink, 209, 0) \
+ x(inode_unreachable, 210, 0) \
+ x(deleted_inode_but_clean, 211, 0) \
+ x(deleted_inode_missing, 212, 0) \
+ x(deleted_inode_is_dir, 213, 0) \
+ x(deleted_inode_not_unlinked, 214, 0) \
+ x(extent_overlapping, 215, 0) \
+ x(extent_in_missing_inode, 216, 0) \
+ x(extent_in_non_reg_inode, 217, 0) \
+ x(extent_past_end_of_inode, 218, 0) \
+ x(dirent_empty_name, 219, 0) \
+ x(dirent_val_too_big, 220, 0) \
+ x(dirent_name_too_long, 221, 0) \
+ x(dirent_name_embedded_nul, 222, 0) \
+ x(dirent_name_dot_or_dotdot, 223, 0) \
+ x(dirent_name_has_slash, 224, 0) \
+ x(dirent_d_type_wrong, 225, 0) \
+ x(inode_bi_parent_wrong, 226, 0) \
+ x(dirent_in_missing_dir_inode, 227, 0) \
+ x(dirent_in_non_dir_inode, 228, 0) \
+ x(dirent_to_missing_inode, 229, 0) \
+ x(dirent_to_missing_subvol, 230, 0) \
+ x(dirent_to_itself, 231, 0) \
+ x(quota_type_invalid, 232, 0) \
+ x(xattr_val_size_too_small, 233, 0) \
+ x(xattr_val_size_too_big, 234, 0) \
+ x(xattr_invalid_type, 235, 0) \
+ x(xattr_name_invalid_chars, 236, 0) \
+ x(xattr_in_missing_inode, 237, 0) \
+ x(root_subvol_missing, 238, 0) \
+ x(root_dir_missing, 239, 0) \
+ x(root_inode_not_dir, 240, 0) \
+ x(dir_loop, 241, 0) \
+ x(hash_table_key_duplicate, 242, 0) \
+ x(hash_table_key_wrong_offset, 243, 0) \
+ x(unlinked_inode_not_on_deleted_list, 244, 0) \
+ x(reflink_p_front_pad_bad, 245, 0) \
+ x(journal_entry_dup_same_device, 246, 0) \
+ x(inode_bi_subvol_missing, 247, 0) \
+ x(inode_bi_subvol_wrong, 248, 0) \
+ x(inode_points_to_missing_dirent, 249, 0) \
+ x(inode_points_to_wrong_dirent, 250, 0) \
+ x(inode_bi_parent_nonzero, 251, 0) \
+ x(dirent_to_missing_parent_subvol, 252, 0) \
+ x(dirent_not_visible_in_parent_subvol, 253, 0) \
+ x(subvol_fs_path_parent_wrong, 254, 0) \
+ x(subvol_root_fs_path_parent_nonzero, 255, 0) \
+ x(subvol_children_not_set, 256, 0) \
+ x(subvol_children_bad, 257, 0) \
+ x(subvol_loop, 258, 0) \
+ x(subvol_unreachable, 259, 0) \
+ x(btree_node_bkey_bad_u64s, 260, 0) \
+ x(btree_node_topology_empty_interior_node, 261, 0) \
+ x(btree_ptr_v2_min_key_bad, 262, 0) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
+ x(snapshot_node_missing, 264, 0) \
+ x(dup_backpointer_to_bad_csum_extent, 265, 0) \
+ x(btree_bitmap_not_marked, 266, 0) \
+ x(sb_clean_entry_overrun, 267, 0) \
+ x(btree_ptr_v2_written_0, 268, 0) \
+ x(subvol_snapshot_bad, 269, 0) \
+ x(subvol_inode_bad, 270, 0) \
+ x(alloc_key_stripe_sectors_wrong, 271, 0) \
+ x(accounting_mismatch, 272, 0) \
+ x(accounting_replicas_not_marked, 273, 0) \
+ x(invalid_btree_id, 274, 0) \
+ x(alloc_key_io_time_bad, 275, 0) \
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
+#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h
index c1860d8163fb..c4b3d8d3f414 100644
--- a/fs/bcachefs/seqmutex.h
+++ b/fs/bcachefs/seqmutex.h
@@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock)
static inline void seqmutex_lock(struct seqmutex *lock)
{
mutex_lock(&lock->lock);
-}
-
-static inline void seqmutex_unlock(struct seqmutex *lock)
-{
lock->seq++;
- mutex_unlock(&lock->lock);
}
-static inline u32 seqmutex_seq(struct seqmutex *lock)
+static inline u32 seqmutex_unlock(struct seqmutex *lock)
{
- return lock->seq;
+ u32 seq = lock->seq;
+ mutex_unlock(&lock->lock);
+ return seq;
}
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 51918acfd726..24023d6a9698 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
+ if (unlikely(new_bytes > INT_MAX))
+ return NULL;
+
new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
@@ -1565,13 +1568,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0;
- if (!test_bit(BCH_FS_started, &c->flags)) {
- ret = bch2_fs_read_write_early(c);
- bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
- if (ret)
- return ret;
- }
-
trans = bch2_trans_get(c);
/*
@@ -1687,6 +1683,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+ set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
+
bch2_delete_dead_snapshots(c);
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index cbad9b27874f..c8c266cb5797 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -300,7 +300,7 @@ not_found:
if (!found && (flags & STR_HASH_must_replace)) {
ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
} else if (found && (flags & STR_HASH_must_create)) {
- ret = -EEXIST;
+ ret = -BCH_ERR_EEXIST_str_hash_set;
} else {
if (!found && slot.path)
swap(iter, slot);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 055478d21e9e..b156fc85b8a3 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -649,9 +649,10 @@ reread:
bytes = vstruct_bytes(sb->sb);
- if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) {
- prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
- bytes, 512UL << sb->sb->layout.sb_max_size_bits);
+ u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits);
+ if (bytes > sb_size) {
+ prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)",
+ bytes, sb_size);
return -BCH_ERR_invalid_sb_too_big;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 65e239d32915..da735608d47c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
- bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -564,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
- free_percpu(c->online_reserved);
+ if (c->online_reserved) {
+ u64 v = percpu_u64_get(c->online_reserved);
+ WARN(v, "online_reserved not 0 at shutdown: %lli", v);
+ free_percpu(c->online_reserved);
+ }
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@@ -912,9 +914,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_journal_init(&c->journal) ?:
bch2_fs_replicas_init(c) ?:
+ bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
- bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
@@ -931,12 +933,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
- for (i = 0; i < c->sb.nr_devices; i++)
- if (bch2_member_exists(c->disk_sb.sb, i) &&
- bch2_dev_alloc(c, i)) {
- ret = -EEXIST;
+ for (i = 0; i < c->sb.nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+ ret = bch2_dev_alloc(c, i);
+ if (ret)
goto err;
- }
+ }
bch2_journal_entry_res_resize(&c->journal,
&c->btree_root_journal_res,
@@ -1194,6 +1197,7 @@ static void bch2_dev_free(struct bch_dev *ca)
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
+ bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
free_percpu(ca->io_done);
@@ -1316,6 +1320,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
+ bch2_dev_allocator_background_init(ca);
+
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@@ -1528,6 +1534,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
+ bch2_recalc_capacity(c);
bch2_dev_journal_stop(&c->journal, ca);
}
@@ -1539,6 +1546,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ bch2_dev_do_discards(ca);
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
@@ -1764,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@@ -1924,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index de331dec2a99..4ec7e44d6e36 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
}
-void bch2_print_string_as_lines(const char *prefix, const char *lines)
+static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
+ bool nonblocking)
{
+ bool locked = false;
const char *p;
if (!lines) {
@@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
return;
}
- console_lock();
+ if (!nonblocking) {
+ console_lock();
+ locked = true;
+ } else {
+ locked = console_trylock();
+ }
+
while (1) {
p = strchrnul(lines, '\n');
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
@@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
break;
lines = p + 1;
}
- console_unlock();
+ if (locked)
+ console_unlock();
+}
+
+void bch2_print_string_as_lines(const char *prefix, const char *lines)
+{
+ return __bch2_print_string_as_lines(prefix, lines, false);
+}
+
+void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines)
+{
+ return __bch2_print_string_as_lines(prefix, lines, true);
}
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr,
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 5d2c470a49ac..5b0533ec4c7e 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
void bch2_prt_u64_base2(struct printbuf *, u64);
void bch2_print_string_as_lines(const char *prefix, const char *lines);
+void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines);
typedef DARRAY(unsigned long) bch_stacktrace;
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index d76f406d3b2e..f92f108840f5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -475,6 +475,7 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
char *link = folio_address(folio);
+ int err = -EIO;
if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
@@ -487,13 +488,10 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
goto fail;
}
link[len - 1] = '\0';
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
+ err = 0;
fail:
- folio_set_error(folio);
- folio_unlock(folio);
- return -EIO;
+ folio_end_read(folio, err == 0);
+ return err;
}
/*
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a43897b03ce9..6fdec541f8bf 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1216,7 +1216,6 @@ out_free_interp:
}
reloc_func_desc = interp_load_addr;
- allow_write_access(interpreter);
fput(interpreter);
kfree(interp_elf_ex);
@@ -1308,7 +1307,6 @@ out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
out_free_file:
- allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_ph:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b799701454a9..28a3439f163a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -394,7 +394,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
goto error;
}
- allow_write_access(interpreter);
fput(interpreter);
interpreter = NULL;
}
@@ -466,10 +465,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
retval = 0;
error:
- if (interpreter) {
- allow_write_access(interpreter);
+ if (interpreter)
fput(interpreter);
- }
kfree(interpreter_name);
kfree(exec_params.phdrs);
kfree(exec_params.loadmap);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 68fa225f89e5..31660d8cc2c6 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -247,13 +247,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (retval < 0)
goto ret;
- if (fmt->flags & MISC_FMT_OPEN_FILE) {
+ if (fmt->flags & MISC_FMT_OPEN_FILE)
interp_file = file_clone_open(fmt->interp_file);
- if (!IS_ERR(interp_file))
- deny_write_access(interp_file);
- } else {
+ else
interp_file = open_exec(fmt->interpreter);
- }
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
goto ret;
@@ -1086,4 +1083,5 @@ static void __exit exit_misc_binfmt(void)
core_initcall(init_misc_binfmt);
module_exit(exit_misc_binfmt);
+MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
MODULE_LICENSE("GPL");
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1b6625e95958..637daf6e4d45 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -155,4 +155,5 @@ static void __exit exit_script_binfmt(void)
core_initcall(init_script_binfmt);
module_exit(exit_script_binfmt);
+MODULE_DESCRIPTION("Kernel support for scripts starting with #!");
MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 477f350a8bd0..e3a57196b0ee 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -741,7 +741,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
- } else if (use_append) {
+ } else if (use_append ||
+ (btrfs_is_zoned(fs_info) && inode &&
+ inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
goto fail_put_bio;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 1e09aeea69c2..60066822b532 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1785,6 +1785,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
struct btrfs_block_group *bg;
struct btrfs_space_info *space_info;
+ LIST_HEAD(retry_list);
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
@@ -1921,8 +1922,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
next:
- if (ret)
- btrfs_mark_bg_to_reclaim(bg);
+ if (ret) {
+ /* Refcount held by the reclaim_bgs list after splice. */
+ spin_lock(&fs_info->unused_bgs_lock);
+ /*
+ * This block group might be added to the unused list
+ * during the above process. Move it back to the
+ * reclaim list otherwise.
+ */
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ list_add_tail(&bg->bg_list, &retry_list);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ }
btrfs_put_block_group(bg);
mutex_unlock(&fs_info->reclaim_bgs_lock);
@@ -1942,6 +1955,9 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
end:
+ spin_lock(&fs_info->unused_bgs_lock);
+ list_splice_tail(&retry_list, &fs_info->reclaim_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 38cdb8875e8e..cabb558dbdaa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2856,6 +2856,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
+ spin_lock_init(&fs_info->extent_map_shrinker_lock);
+
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f688fab55251..958155cc43a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3553,7 +3553,7 @@ err:
for (int i = 0; i < num_folios; i++) {
if (eb->folios[i]) {
detach_extent_buffer_folio(eb, eb->folios[i]);
- __folio_put(eb->folios[i]);
+ folio_put(eb->folios[i]);
}
}
__free_extent_buffer(eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 744e8952abb0..b4c9a6aa118c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1028,7 +1028,14 @@ out_free_pre:
return ret;
}
-static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_to_scan)
+struct btrfs_em_shrink_ctx {
+ long nr_to_scan;
+ long scanned;
+ u64 last_ino;
+ u64 last_root;
+};
+
+static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx)
{
const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info);
struct extent_map_tree *tree = &inode->extent_tree;
@@ -1057,14 +1064,25 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
if (!down_read_trylock(&inode->i_mmap_lock))
return 0;
- write_lock(&tree->lock);
+ /*
+ * We want to be fast because we can be called from any path trying to
+ * allocate memory, so if the lock is busy we don't want to spend time
+ * waiting for it - either some task is about to do IO for the inode or
+ * we may have another task shrinking extent maps, here in this code, so
+ * skip this inode.
+ */
+ if (!write_trylock(&tree->lock)) {
+ up_read(&inode->i_mmap_lock);
+ return 0;
+ }
+
node = rb_first_cached(&tree->map);
while (node) {
struct extent_map *em;
em = rb_entry(node, struct extent_map, rb_node);
node = rb_next(node);
- (*scanned)++;
+ ctx->scanned++;
if (em->flags & EXTENT_FLAG_PINNED)
goto next;
@@ -1085,16 +1103,18 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
free_extent_map(em);
nr_dropped++;
next:
- if (*scanned >= nr_to_scan)
+ if (ctx->scanned >= ctx->nr_to_scan)
break;
/*
- * Restart if we had to reschedule, and any extent maps that were
- * pinned before may have become unpinned after we released the
- * lock and took it again.
+ * Stop if we need to reschedule or there's contention on the
+ * lock. This is to avoid slowing other tasks trying to take the
+ * lock and because the shrinker might be called during a memory
+ * allocation path and we want to avoid taking a very long time
+ * and slowing down all sorts of tasks.
*/
- if (cond_resched_rwlock_write(&tree->lock))
- node = rb_first_cached(&tree->map);
+ if (need_resched() || rwlock_needbreak(&tree->lock))
+ break;
}
write_unlock(&tree->lock);
up_read(&inode->i_mmap_lock);
@@ -1102,25 +1122,30 @@ next:
return nr_dropped;
}
-static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_scan)
+static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_inode *inode;
long nr_dropped = 0;
- u64 min_ino = fs_info->extent_map_shrinker_last_ino + 1;
+ u64 min_ino = ctx->last_ino + 1;
inode = btrfs_find_first_inode(root, min_ino);
while (inode) {
- nr_dropped += btrfs_scan_inode(inode, scanned, nr_to_scan);
+ nr_dropped += btrfs_scan_inode(inode, ctx);
min_ino = btrfs_ino(inode) + 1;
- fs_info->extent_map_shrinker_last_ino = btrfs_ino(inode);
- iput(&inode->vfs_inode);
+ ctx->last_ino = btrfs_ino(inode);
+ btrfs_add_delayed_iput(inode);
- if (*scanned >= nr_to_scan)
+ if (ctx->scanned >= ctx->nr_to_scan)
+ break;
+
+ /*
+ * We may be called from memory allocation paths, so we don't
+ * want to take too much time and slowdown tasks.
+ */
+ if (need_resched())
break;
- cond_resched();
inode = btrfs_find_first_inode(root, min_ino);
}
@@ -1132,14 +1157,14 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
* inode if there is one or we will find out this was the last
* one and move to the next root.
*/
- fs_info->extent_map_shrinker_last_root = btrfs_root_id(root);
+ ctx->last_root = btrfs_root_id(root);
} else {
/*
* No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
* that when processing the next root we start from its first inode.
*/
- fs_info->extent_map_shrinker_last_ino = 0;
- fs_info->extent_map_shrinker_last_root = btrfs_root_id(root) + 1;
+ ctx->last_ino = 0;
+ ctx->last_root = btrfs_root_id(root) + 1;
}
return nr_dropped;
@@ -1147,19 +1172,41 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
{
- const u64 start_root_id = fs_info->extent_map_shrinker_last_root;
- u64 next_root_id = start_root_id;
+ struct btrfs_em_shrink_ctx ctx;
+ u64 start_root_id;
+ u64 next_root_id;
bool cycled = false;
long nr_dropped = 0;
- long scanned = 0;
+
+ ctx.scanned = 0;
+ ctx.nr_to_scan = nr_to_scan;
+
+ /*
+ * In case we have multiple tasks running this shrinker, make the next
+ * one start from the next inode in case it starts before we finish.
+ */
+ spin_lock(&fs_info->extent_map_shrinker_lock);
+ ctx.last_ino = fs_info->extent_map_shrinker_last_ino;
+ fs_info->extent_map_shrinker_last_ino++;
+ ctx.last_root = fs_info->extent_map_shrinker_last_root;
+ spin_unlock(&fs_info->extent_map_shrinker_lock);
+
+ start_root_id = ctx.last_root;
+ next_root_id = ctx.last_root;
if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) {
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
- trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, nr);
+ trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan,
+ nr, ctx.last_root,
+ ctx.last_ino);
}
- while (scanned < nr_to_scan) {
+ /*
+ * We may be called from memory allocation paths, so we don't want to
+ * take too much time and slowdown tasks, so stop if we need reschedule.
+ */
+ while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
struct btrfs_root *root;
unsigned long count;
@@ -1171,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
spin_unlock(&fs_info->fs_roots_radix_lock);
if (start_root_id > 0 && !cycled) {
next_root_id = 0;
- fs_info->extent_map_shrinker_last_root = 0;
- fs_info->extent_map_shrinker_last_ino = 0;
+ ctx.last_root = 0;
+ ctx.last_ino = 0;
cycled = true;
continue;
}
@@ -1186,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
continue;
if (is_fstree(btrfs_root_id(root)))
- nr_dropped += btrfs_scan_root(root, &scanned, nr_to_scan);
+ nr_dropped += btrfs_scan_root(root, &ctx);
btrfs_put_root(root);
}
+ /*
+ * In case of multiple tasks running this extent map shrinking code this
+ * isn't perfect but it's simple and silences things like KCSAN. It's
+ * not possible to know which task made more progress because we can
+ * cycle back to the first root and first inode if it's not the first
+ * time the shrinker ran, see the above logic. Also a task that started
+ * later may finish ealier than another task and made less progress. So
+ * make this simple and update to the progress of the last task that
+ * finished, with the occasional possiblity of having two consecutive
+ * runs of the shrinker process the same inodes.
+ */
+ spin_lock(&fs_info->extent_map_shrinker_lock);
+ fs_info->extent_map_shrinker_last_ino = ctx.last_ino;
+ fs_info->extent_map_shrinker_last_root = ctx.last_root;
+ spin_unlock(&fs_info->extent_map_shrinker_lock);
+
if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) {
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
- trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr);
+ trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped,
+ nr, ctx.last_root,
+ ctx.last_ino);
}
return nr_dropped;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3ab8dea5036b..dabc3d0793cf 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2697,7 +2697,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
int bg_reclaim_threshold = 0;
- bool initial = (size == block_group->length);
+ bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
u64 reclaimable_unusable;
WARN_ON(!initial && offset + size > block_group->zone_capacity);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 89f0650631cd..833dc3fe0a38 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -630,6 +630,7 @@ struct btrfs_fs_info {
s32 delalloc_batch;
struct percpu_counter evictable_extent_maps;
+ spinlock_t extent_map_shrinker_lock;
u64 extent_map_shrinker_last_root;
u64 extent_map_shrinker_last_ino;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 753db965f7c0..d62c96f00ff8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5587,7 +5587,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
args.ino = ino;
args.root = root;
- inode = iget5_locked(s, hashval, btrfs_find_actor,
+ inode = iget5_locked_rcu(s, hashval, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
@@ -10385,7 +10385,7 @@ out_unlock:
out_folios:
for (i = 0; i < nr_folios; i++) {
if (folios[i])
- __folio_put(folios[i]);
+ folio_put(folios[i]);
}
kvfree(folios);
out:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index efd5d6e9589e..6ad524b894fc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4627,7 +4627,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_iov;
init_sync_kiocb(&kiocb, file);
- ret = kiocb_set_rw_flags(&kiocb, 0);
+ ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
if (ret)
goto out_iov;
kiocb.ki_pos = pos;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc2a7ea26354..39a15cca58ca 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1351,7 +1351,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info)
int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *quota_root;
+ struct btrfs_root *quota_root = NULL;
struct btrfs_trans_handle *trans = NULL;
int ret = 0;
@@ -1449,9 +1449,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
quota_root->node, 0, 1);
- btrfs_put_root(quota_root);
out:
+ btrfs_put_root(quota_root);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (ret && trans)
btrfs_end_transaction(trans);
@@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_inherit *inherit,
size_t size)
{
- if (!btrfs_qgroup_enabled(fs_info))
- return 0;
if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
return -EOPNOTSUPP;
if (size < sizeof(*inherit) || size > PAGE_SIZE)
@@ -3085,6 +3083,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
return -EINVAL;
/*
+ * Skip the inherit source qgroups check if qgroup is not enabled.
+ * Qgroup can still be later enabled causing problems, but in that case
+ * btrfs_qgroup_inherit() would just ignore those invalid ones.
+ */
+ if (!btrfs_qgroup_enabled(fs_info))
+ return 0;
+
+ /*
* Now check all the remaining qgroups, they should all:
*
* - Exist
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index cf531255ab76..9522a8b79d22 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
u32 item_size = btrfs_item_size(leaf, slot);
unsigned long end, ptr;
u64 offset, flags, count;
- int type, ret;
+ int type;
+ int ret = 0;
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
@@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
key->objectid, key->offset);
break;
case BTRFS_EXTENT_OWNER_REF_KEY:
- WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+ if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
+ btrfs_err(fs_info,
+ "found extent owner ref without simple quotas enabled");
+ ret = -EINVAL;
+ }
break;
default:
btrfs_err(fs_info, "invalid key type in iref");
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index afd6932f5e89..d7caa3732f07 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1688,20 +1688,24 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
(i << fs_info->sectorsize_bits);
int err;
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
-
io_stripe.is_scrub = true;
+ stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
+ /*
+ * For RST cases, we need to manually split the bbio to
+ * follow the RST boundary.
+ */
err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
- &stripe_len, &bioc, &io_stripe,
- &mirror);
+ &stripe_len, &bioc, &io_stripe, &mirror);
btrfs_put_bioc(bioc);
- if (err) {
- btrfs_bio_end_io(bbio,
- errno_to_blk_status(err));
- return;
+ if (err < 0) {
+ set_bit(i, &stripe->io_error_bitmap);
+ set_bit(i, &stripe->error_bitmap);
+ continue;
}
+
+ bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+ fs_info, scrub_read_endio, stripe);
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
}
__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d620323d08ea..ae8c56442549 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
* "optimal" chunk size based on the fs size. However when we actually
* allocate the chunk we will strip this down further, making it no more
* than 10% of the disk or 1G, whichever is smaller.
+ *
+ * On the zoned mode, we need to use zone_size (=
+ * data_sinfo->chunk_size) as it is.
*/
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- data_chunk_size = min(data_sinfo->chunk_size,
- mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
- data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ if (!btrfs_is_zoned(fs_info)) {
+ data_chunk_size = min(data_sinfo->chunk_size,
+ mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
+ data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ } else {
+ data_chunk_size = data_sinfo->chunk_size;
+ }
/*
* Since data allocations immediately use block groups as part of the
@@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
avail >>= 3;
else
avail >>= 1;
+
+ /*
+ * On the zoned mode, we always allocate one zone as one chunk.
+ * Returning non-zone size alingned bytes here will result in
+ * less pressure for the async metadata reclaim process, and it
+ * will over-commit too much leading to ENOSPC. Align down to the
+ * zone size to avoid that.
+ */
+ if (btrfs_is_zoned(fs_info))
+ avail = ALIGN_DOWN(avail, fs_info->zone_size);
+
return avail;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 26a2e5aa08e9..0bce1d45e252 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid);
* and once to do all the other items.
*/
+static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
+{
+ unsigned int nofs_flag;
+ struct inode *inode;
+
+ /*
+ * We're holding a transaction handle whether we are logging or
+ * replaying a log tree, so we must make sure NOFS semantics apply
+ * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL
+ * to allocate an inode, which can recurse back into the filesystem and
+ * attempt a transaction commit, resulting in a deadlock.
+ */
+ nofs_flag = memalloc_nofs_save();
+ inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ memalloc_nofs_restore(nofs_flag);
+
+ return inode;
+}
+
/*
* start a sub transaction and setup the log tree
* this increments the log tree writer count to make the people
@@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
{
struct inode *inode;
- inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ inode = btrfs_iget_logging(objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -5438,7 +5457,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = start_inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
LIST_HEAD(dir_list);
struct btrfs_dir_list *dir_elem;
@@ -5499,7 +5517,7 @@ again:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
+ di_inode = btrfs_iget_logging(di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto out;
@@ -5559,7 +5577,7 @@ again:
btrfs_add_delayed_iput(curr_inode);
curr_inode = NULL;
- vfs_inode = btrfs_iget(fs_info->sb, ino, root);
+ vfs_inode = btrfs_iget_logging(ino, root);
if (IS_ERR(vfs_inode)) {
ret = PTR_ERR(vfs_inode);
break;
@@ -5654,7 +5672,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES)
return BTRFS_LOG_FORCE_COMMIT;
- inode = btrfs_iget(root->fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was deleted in
* the current transaction then we either:
@@ -5755,7 +5773,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
/*
@@ -5786,7 +5803,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
list_del(&curr->list);
kfree(curr);
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -5797,7 +5814,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (ret != -ENOENT)
break;
- inode = btrfs_iget(fs_info->sb, parent, root);
+ inode = btrfs_iget_logging(parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
break;
@@ -6319,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
const bool orig_log_new_dentries = ctx->log_new_dentries;
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_item *item;
int ret = 0;
@@ -6345,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
if (key.type == BTRFS_ROOT_ITEM_KEY)
continue;
- di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root);
+ di_inode = btrfs_iget_logging(key.objectid, inode->root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
break;
@@ -6729,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6794,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
- root);
+ dir_inode = btrfs_iget_logging(inode_key.objectid, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -6857,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
while (true) {
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
int slot;
struct btrfs_key search_key;
@@ -6872,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
diff --git a/fs/buffer.c b/fs/buffer.c
index 8c19e705b9c3..dbe8f411ce52 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -258,7 +258,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
} else {
clear_buffer_uptodate(bh);
buffer_io_error(bh, ", async page read");
- folio_set_error(folio);
}
/*
@@ -391,7 +390,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
buffer_io_error(bh, ", lost async page write");
mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
- folio_set_error(folio);
}
first = folio_buffers(folio);
@@ -1960,7 +1958,6 @@ recover:
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
- folio_set_error(folio);
BUG_ON(folio_test_writeback(folio));
mapping_set_error(folio->mapping, err);
folio_start_writeback(folio);
@@ -2405,10 +2402,8 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
- if (err) {
- folio_set_error(folio);
+ if (err)
page_error = true;
- }
}
if (!buffer_mapped(bh)) {
folio_zero_range(folio, i * blocksize,
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index f449f7340aad..9fb06dc16520 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/namei.h>
+#include <trace/events/fscache.h>
#include "internal.h"
/*
@@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
}
/*
- * Withdraw volumes.
+ * Withdraw fscache volumes.
+ */
+static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache)
+{
+ struct list_head *cur;
+ struct cachefiles_volume *volume;
+ struct fscache_volume *vcookie;
+
+ _enter("");
+retry:
+ spin_lock(&cache->object_list_lock);
+ list_for_each(cur, &cache->volumes) {
+ volume = list_entry(cur, struct cachefiles_volume, cache_link);
+
+ if (atomic_read(&volume->vcookie->n_accesses) == 0)
+ continue;
+
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ fscache_withdraw_volume(vcookie);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
+ goto retry;
+ }
+ }
+ spin_unlock(&cache->object_list_lock);
+
+ _leave("");
+}
+
+/*
+ * Withdraw cachefiles volumes.
*/
static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
{
_enter("");
for (;;) {
+ struct fscache_volume *vcookie = NULL;
struct cachefiles_volume *volume = NULL;
spin_lock(&cache->object_list_lock);
if (!list_empty(&cache->volumes)) {
volume = list_first_entry(&cache->volumes,
struct cachefiles_volume, cache_link);
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (!vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ cpu_relax();
+ continue;
+ }
list_del_init(&volume->cache_link);
}
spin_unlock(&cache->object_list_lock);
@@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
break;
cachefiles_withdraw_volume(volume);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
}
_leave("");
@@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
pr_info("File cache on %s unregistering\n", fscache->name);
fscache_withdraw_cache(fscache);
+ cachefiles_withdraw_fscache_volumes(cache);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 06cdf1a8a16f..89b11336a836 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
if (cachefiles_in_ondemand_mode(cache)) {
if (!xa_empty(&cache->reqs)) {
- rcu_read_lock();
+ xas_lock(&xas);
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (!cachefiles_ondemand_is_reopening_read(req)) {
mask |= EPOLLIN;
break;
}
}
- rcu_read_unlock();
+ xas_unlock(&xas);
}
} else {
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6845a90cdfcc..7b99bd98de75 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -48,6 +48,7 @@ enum cachefiles_object_state {
CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
+ CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */
};
struct cachefiles_ondemand_info {
@@ -128,6 +129,7 @@ struct cachefiles_cache {
unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
+ u32 msg_id_next;
};
static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
@@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
+CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING);
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index bce005f2b456..470c96658385 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
*/
xas_lock(&xas);
- if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ if (test_bit(CACHEFILES_DEAD, &cache->flags) ||
+ cachefiles_ondemand_object_is_dropping(object)) {
xas_unlock(&xas);
ret = -EIO;
goto out;
@@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
smp_mb();
if (opcode == CACHEFILES_OP_CLOSE &&
- !cachefiles_ondemand_object_is_open(object)) {
+ !cachefiles_ondemand_object_is_open(object)) {
WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
xas_unlock(&xas);
ret = -EIO;
goto out;
}
- xas.xa_index = 0;
+ /*
+ * Cyclically find a free xas to avoid msg_id reuse that would
+ * cause the daemon to successfully copen a stale msg_id.
+ */
+ xas.xa_index = cache->msg_id_next;
xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART) {
+ xas.xa_index = 0;
+ xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK);
+ }
if (xas.xa_node == XAS_RESTART)
xas_set_err(&xas, -EBUSY);
+
xas_store(&xas, req);
- xas_clear_mark(&xas, XA_FREE_MARK);
- xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ if (xas_valid(&xas)) {
+ cache->msg_id_next = xas.xa_index + 1;
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ }
xas_unlock(&xas);
} while (xas_nomem(&xas, GFP_KERNEL));
@@ -568,7 +581,8 @@ out:
* If error occurs after creating the anonymous fd,
* cachefiles_ondemand_fd_release() will set object to close.
*/
- if (opcode == CACHEFILES_OP_OPEN)
+ if (opcode == CACHEFILES_OP_OPEN &&
+ !cachefiles_ondemand_object_is_dropping(object))
cachefiles_ondemand_set_object_close(object);
kfree(req);
return ret;
@@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
{
+ unsigned long index;
+ struct cachefiles_req *req;
+ struct cachefiles_cache *cache;
+
+ if (!object->ondemand)
+ return;
+
cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
cachefiles_ondemand_init_close_req, NULL);
+
+ if (!object->ondemand->ondemand_id)
+ return;
+
+ /* Cancel all requests for the object that is being dropped. */
+ cache = object->volume->cache;
+ xa_lock(&cache->reqs);
+ cachefiles_ondemand_set_object_dropping(object);
+ xa_for_each(&cache->reqs, index, req) {
+ if (req->object == object) {
+ req->error = -EIO;
+ complete(&req->done);
+ __xa_erase(&cache->reqs, index);
+ }
+ }
+ xa_unlock(&cache->reqs);
+
+ /* Wait for ondemand_object_worker() to finish to avoid UAF. */
+ cancel_work_sync(&object->ondemand->ondemand_work);
}
int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 89df0ba8ba5e..781aac4ef274 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie)
void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
{
- fscache_withdraw_volume(volume->vcookie);
cachefiles_set_volume_xattr(volume);
__cachefiles_free_volume(volume);
}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index bcb6173943ee..4dd8a993c60a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
if (xlen == 0)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
- if (xlen < 0)
+ if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
cachefiles_trace_getxattr_error);
+ }
if (xlen == -EIO)
cachefiles_io_error_obj(
object,
@@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
cachefiles_trace_getxattr_error);
if (xlen == -EIO)
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index ccdbec388091..40f84d014524 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -31,15 +31,7 @@ static int coda_symlink_filler(struct file *file, struct folio *folio)
cii = ITOC(inode);
error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len);
- if (error)
- goto fail;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-fail:
- folio_set_error(folio);
- folio_unlock(folio);
+ folio_end_read(folio, error == 0);
return error;
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 460690ca0174..b84d1747a020 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -811,19 +811,19 @@ out:
static int cramfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
u32 maxblock;
int bytes_filled;
void *pgdata;
+ bool success = false;
maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
bytes_filled = 0;
- pgdata = kmap_local_page(page);
+ pgdata = kmap_local_folio(folio, 0);
- if (page->index < maxblock) {
+ if (folio->index < maxblock) {
struct super_block *sb = inode->i_sb;
- u32 blkptr_offset = OFFSET(inode) + page->index * 4;
+ u32 blkptr_offset = OFFSET(inode) + folio->index * 4;
u32 block_ptr, block_start, block_len;
bool uncompressed, direct;
@@ -844,7 +844,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
if (uncompressed) {
block_len = PAGE_SIZE;
/* if last block: cap to file length */
- if (page->index == maxblock - 1)
+ if (folio->index == maxblock - 1)
block_len =
offset_in_page(inode->i_size);
} else {
@@ -861,7 +861,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
* from the previous block's pointer.
*/
block_start = OFFSET(inode) + maxblock * 4;
- if (page->index)
+ if (folio->index)
block_start = *(u32 *)
cramfs_read(sb, blkptr_offset - 4, 4);
/* Beware... previous ptr might be a direct ptr */
@@ -906,17 +906,12 @@ static int cramfs_read_folio(struct file *file, struct folio *folio)
}
memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
- flush_dcache_page(page);
- kunmap_local(pgdata);
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ flush_dcache_folio(folio);
+ success = true;
err:
kunmap_local(pgdata);
- ClearPageUptodate(page);
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, success);
return 0;
}
@@ -1003,4 +998,5 @@ static void __exit exit_cramfs_fs(void)
module_init(init_cramfs_fs)
module_exit(exit_cramfs_fs)
+MODULE_DESCRIPTION("Compressed ROM file system support");
MODULE_LICENSE("GPL");
diff --git a/fs/dcache.c b/fs/dcache.c
index 407095188f83..8bdc278a0205 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,8 @@
#include "internal.h"
#include "mount.h"
+#include <asm/runtime-const.h>
+
/*
* Usage:
* dcache->d_inode->i_lock protects:
@@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
-static inline struct hlist_bl_head *d_hash(unsigned int hash)
+static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
{
- return dentry_hashtable + (hash >> d_hash_shift);
+ return runtime_const_ptr(dentry_hashtable) +
+ runtime_const_shift_right_32(hashlen, d_hash_shift);
}
#define IN_LOOKUP_SHIFT 10
@@ -355,7 +358,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
- if (flags & DCACHE_LRU_LIST)
+ /*
+ * The negative counter only tracks dentries on the LRU. Don't inc if
+ * d_lru is on another list.
+ */
+ if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}
@@ -1548,7 +1555,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
{
struct dentry *dentry;
- WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
+ rwsem_assert_held_write(&sb->s_umount);
dentry = sb->s_root;
sb->s_root = NULL;
@@ -1844,9 +1851,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
/*
- * Decrement negative dentry count if it was in the LRU list.
+ * The negative counter only tracks dentries on the LRU. Don't dec if
+ * d_lru is on another list.
*/
- if (dentry->d_flags & DCACHE_LRU_LIST)
+ if ((dentry->d_flags &
+ (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_dec(nr_dentry_negative);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
raw_write_seqcount_begin(&dentry->d_seq);
@@ -2104,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
unsigned *seqp)
{
u64 hashlen = name->hash_len;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -2171,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
{
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -3029,28 +3038,25 @@ EXPORT_SYMBOL(d_splice_alias);
bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
- bool result;
+ bool subdir;
unsigned seq;
if (new_dentry == old_dentry)
return true;
- do {
- /* for restarting inner loop in case of seq retry */
- seq = read_seqbegin(&rename_lock);
- /*
- * Need rcu_readlock to protect against the d_parent trashing
- * due to d_move
- */
- rcu_read_lock();
- if (d_ancestor(old_dentry, new_dentry))
- result = true;
- else
- result = false;
- rcu_read_unlock();
- } while (read_seqretry(&rename_lock, seq));
-
- return result;
+ /* Access d_parent under rcu as d_move() may change it. */
+ rcu_read_lock();
+ seq = read_seqbegin(&rename_lock);
+ subdir = d_ancestor(old_dentry, new_dentry);
+ /* Try lockless once... */
+ if (read_seqretry(&rename_lock, seq)) {
+ /* ...else acquire lock for progress even on deep chains. */
+ read_seqlock_excl(&rename_lock);
+ subdir = d_ancestor(old_dentry, new_dentry);
+ read_sequnlock_excl(&rename_lock);
+ }
+ rcu_read_unlock();
+ return subdir;
}
EXPORT_SYMBOL(is_subdir);
@@ -3100,6 +3106,34 @@ void d_tmpfile(struct file *file, struct inode *inode)
}
EXPORT_SYMBOL(d_tmpfile);
+/*
+ * Obtain inode number of the parent dentry.
+ */
+ino_t d_parent_ino(struct dentry *dentry)
+{
+ struct dentry *parent;
+ struct inode *iparent;
+ unsigned seq;
+ ino_t ret;
+
+ scoped_guard(rcu) {
+ seq = raw_seqcount_begin(&dentry->d_seq);
+ parent = READ_ONCE(dentry->d_parent);
+ iparent = d_inode_rcu(parent);
+ if (likely(iparent)) {
+ ret = iparent->i_ino;
+ if (!read_seqcount_retry(&dentry->d_seq, seq))
+ return ret;
+ }
+ }
+
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_parent->d_inode->i_ino;
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+EXPORT_SYMBOL(d_parent_ino);
+
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
@@ -3129,6 +3163,9 @@ static void __init dcache_init_early(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
static void __init dcache_init(void)
@@ -3157,6 +3194,9 @@ static void __init dcache_init(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
/* SLAB cache for __getname() consumers */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8fd928899a59..91521576f500 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -92,9 +92,9 @@ enum {
};
static const struct fs_parameter_spec debugfs_param_specs[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -102,8 +102,6 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
{
struct debugfs_fs_info *opts = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, debugfs_param_specs, param, &result);
@@ -120,16 +118,10 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
switch (opt) {
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalf(fc, "Unknown uid");
- opts->uid = uid;
+ opts->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalf(fc, "Unknown gid");
- opts->gid = gid;
+ opts->gid = result.gid;
break;
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index bb14462f6d99..a929f1b613be 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -275,8 +275,8 @@ enum {
};
static const struct fs_parameter_spec efivarfs_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
{},
};
@@ -293,14 +293,10 @@ static int efivarfs_parse_param(struct fs_context *fc, struct fs_parameter *para
switch (opt) {
case Opt_uid:
- opts->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(opts->uid))
- return -EINVAL;
+ opts->uid = result.uid;
break;
case Opt_gid:
- opts->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(opts->gid))
- return -EINVAL;
+ opts->gid = result.gid;
break;
default:
return -EINVAL;
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 7844ab24b813..462619e59766 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -311,4 +311,5 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) {
return 0;
}
+MODULE_DESCRIPTION("Extent File System (efs)");
MODULE_LICENSE("GPL");
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 3b03a573cb1a..7749feded722 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -14,10 +14,9 @@
static int efs_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- char *link = page_address(page);
- struct buffer_head * bh;
- struct inode * inode = page->mapping->host;
+ char *link = folio_address(folio);
+ struct buffer_head *bh;
+ struct inode *inode = folio->mapping->host;
efs_block_t size = inode->i_size;
int err;
@@ -40,12 +39,9 @@ static int efs_symlink_read_folio(struct file *file, struct folio *folio)
brelse(bh);
}
link[size] = '\0';
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ err = 0;
fail:
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, err == 0);
return err;
}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index c93bd24d2771..1b91d9513013 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -343,7 +343,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
- memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
+ super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid));
ret = strscpy(sbi->volume_name, dsb->volume_name,
sizeof(dsb->volume_name));
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 9b248ee5fef2..74d3d7bffcf3 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -711,6 +711,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
err = z_erofs_do_map_blocks(inode, map, flags);
out:
+ if (err)
+ map->m_llen = 0;
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
return err;
}
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 036024bce9f7..b80f612867c2 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -148,7 +148,7 @@ int __init z_erofs_gbuf_init(void)
void z_erofs_gbuf_exit(void)
{
- int i;
+ int i, j;
for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
@@ -161,9 +161,9 @@ void z_erofs_gbuf_exit(void)
if (!gbuf->pages)
continue;
- for (i = 0; i < gbuf->nrpages; ++i)
- if (gbuf->pages[i])
- put_page(gbuf->pages[i]);
+ for (j = 0; j < gbuf->nrpages; ++j)
+ if (gbuf->pages[j])
+ put_page(gbuf->pages[j]);
kfree(gbuf->pages);
gbuf->pages = NULL;
}
diff --git a/fs/exec.c b/fs/exec.c
index 40073142288f..4dee205452e2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -952,10 +952,6 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
path_noexec(&file->f_path)))
goto exit;
- err = deny_write_access(file);
- if (err)
- goto exit;
-
out:
return file;
@@ -971,8 +967,7 @@ exit:
*
* Returns ERR_PTR on failure or allocated struct file on success.
*
- * As this is a wrapper for the internal do_open_execat(), callers
- * must call allow_write_access() before fput() on release. Also see
+ * As this is a wrapper for the internal do_open_execat(). Also see
* do_close_execat().
*/
struct file *open_exec(const char *name)
@@ -1524,10 +1519,8 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
/* Matches do_open_execat() */
static void do_close_execat(struct file *file)
{
- if (!file)
- return;
- allow_write_access(file);
- fput(file);
+ if (file)
+ fput(file);
}
static void free_bprm(struct linux_binprm *bprm)
@@ -1846,7 +1839,6 @@ static int exec_binprm(struct linux_binprm *bprm)
bprm->file = bprm->interpreter;
bprm->interpreter = NULL;
- allow_write_access(exec);
if (unlikely(bprm->have_execfd)) {
if (bprm->executable) {
fput(exec);
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3d5ea2cfad66..a3c7173ef693 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -225,8 +225,8 @@ static const struct constant_table exfat_param_enums[] = {
};
static const struct fs_parameter_spec exfat_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
fsparam_u32oct("umask", Opt_umask),
fsparam_u32oct("dmask", Opt_dmask),
fsparam_u32oct("fmask", Opt_fmask),
@@ -262,10 +262,10 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (opt) {
case Opt_uid:
- opts->fs_uid = make_kuid(current_user_ns(), result.uint_32);
+ opts->fs_uid = result.uid;
break;
case Opt_gid:
- opts->fs_gid = make_kgid(current_user_ns(), result.uint_32);
+ opts->fs_gid = result.gid;
break;
case Opt_umask:
opts->fs_fmask = result.uint_32;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 07ea3d62b298..4f2dd4ab4486 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(exportfs_encode_fh);
struct dentry *
exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
- int fileid_type,
+ int fileid_type, unsigned int flags,
int (*acceptable)(void *, struct dentry *),
void *context)
{
@@ -445,6 +445,11 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
if (IS_ERR_OR_NULL(result))
return result;
+ if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) {
+ err = -ENOTDIR;
+ goto err_result;
+ }
+
/*
* If no acceptance criteria was specified by caller, a disconnected
* dentry is also accepatable. Callers may use this mode to query if
@@ -581,7 +586,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
{
struct dentry *ret;
- ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
+ ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0,
acceptable, context);
if (IS_ERR_OR_NULL(ret)) {
if (ret == ERR_PTR(-ENOMEM))
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 7ae0b61258a7..0a056d97e640 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -31,11 +31,10 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
ext4_fname_from_fscrypt_name(fname, &name);
-#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
if (err)
ext4_fname_free_filename(fname);
-#endif
+
return err;
}
@@ -51,11 +50,9 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
ext4_fname_from_fscrypt_name(fname, &name);
-#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
if (err)
ext4_fname_free_filename(fname);
-#endif
return err;
}
@@ -70,10 +67,7 @@ void ext4_fname_free_filename(struct ext4_filename *fname)
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
-#if IS_ENABLED(CONFIG_UNICODE)
- kfree(fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ ext4_fname_free_ci_filename(fname);
}
static bool uuid_is_zero(__u8 u[16])
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 983dad8c07ec..8007abd4972d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2511,7 +2511,7 @@ struct ext4_filename {
struct fscrypt_str crypto_buf;
#endif
#if IS_ENABLED(CONFIG_UNICODE)
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};
@@ -2745,8 +2745,25 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
#if IS_ENABLED(CONFIG_UNICODE)
extern int ext4_fname_setup_ci_filename(struct inode *dir,
- const struct qstr *iname,
- struct ext4_filename *fname);
+ const struct qstr *iname,
+ struct ext4_filename *fname);
+
+static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname)
+{
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+}
+#else
+static inline int ext4_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct ext4_filename *fname)
+{
+ return 0;
+}
+
+static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname)
+{
+}
#endif
/* ext4 encryption related stuff goes here crypto.c */
@@ -2769,16 +2786,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
int lookup,
struct ext4_filename *fname)
{
- int err = 0;
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
-#if IS_ENABLED(CONFIG_UNICODE)
- err = ext4_fname_setup_ci_filename(dir, iname, fname);
-#endif
-
- return err;
+ return ext4_fname_setup_ci_filename(dir, iname, fname);
}
static inline int ext4_fname_prepare_lookup(struct inode *dir,
@@ -2790,10 +2802,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
static inline void ext4_fname_free_filename(struct ext4_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
- kfree(fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ ext4_fname_free_ci_filename(fname);
}
static inline int ext4_ioctl_get_encryption_pwsalt(struct file *filp,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a630b27a4cc6..e6769b97a970 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1390,62 +1390,11 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
}
#if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Test whether a case-insensitive directory entry matches the filename
- * being searched for. If quick is set, assume the name being looked up
- * is already in the casefolded form.
- *
- * Returns: 0 if the directory entry matches, more than 0 if it
- * doesn't match or less than zero on error.
- */
-static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- u8 *de_name, size_t de_name_len, bool quick)
-{
- const struct super_block *sb = parent->i_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
- struct qstr entry = QSTR_INIT(de_name, de_name_len);
- int ret;
-
- if (IS_ENCRYPTED(parent)) {
- const struct fscrypt_str encrypted_name =
- FSTR_INIT(de_name, de_name_len);
-
- decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
- if (!decrypted_name.name)
- return -ENOMEM;
- ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
- &decrypted_name);
- if (ret < 0)
- goto out;
- entry.name = decrypted_name.name;
- entry.len = decrypted_name.len;
- }
-
- if (quick)
- ret = utf8_strncasecmp_folded(um, name, &entry);
- else
- ret = utf8_strncasecmp(um, name, &entry);
- if (ret < 0) {
- /* Handle invalid character sequence as either an error
- * or as an opaque byte sequence.
- */
- if (sb_has_strict_encoding(sb))
- ret = -EINVAL;
- else if (name->len != entry.len)
- ret = 1;
- else
- ret = !!memcmp(name->name, entry.name, entry.len);
- }
-out:
- kfree(decrypted_name.name);
- return ret;
-}
-
int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct ext4_filename *name)
{
- struct fscrypt_str *cf_name = &name->cf_name;
+ struct qstr *cf_name = &name->cf_name;
+ unsigned char *buf;
struct dx_hash_info *hinfo = &name->hinfo;
int len;
@@ -1455,18 +1404,18 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
- cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
- if (!cf_name->name)
+ buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!buf)
return -ENOMEM;
- len = utf8_casefold(dir->i_sb->s_encoding,
- iname, cf_name->name,
- EXT4_NAME_LEN);
+ len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN);
if (len <= 0) {
- kfree(cf_name->name);
- cf_name->name = NULL;
+ kfree(buf);
+ buf = NULL;
}
+ cf_name->name = buf;
cf_name->len = (unsigned) len;
+
if (!IS_ENCRYPTED(dir))
return 0;
@@ -1502,22 +1451,29 @@ static bool ext4_match(struct inode *parent,
#if IS_ENABLED(CONFIG_UNICODE)
if (IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
- if (fname->cf_name.name) {
- struct qstr cf = {.name = fname->cf_name.name,
- .len = fname->cf_name.len};
- if (IS_ENCRYPTED(parent)) {
- if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
- fname->hinfo.minor_hash !=
- EXT4_DIRENT_MINOR_HASH(de)) {
-
- return false;
- }
- }
- return !ext4_ci_compare(parent, &cf, de->name,
- de->name_len, true);
- }
- return !ext4_ci_compare(parent, fname->usr_fname, de->name,
- de->name_len, false);
+ /*
+ * Just checking IS_ENCRYPTED(parent) below is not
+ * sufficient to decide whether one can use the hash for
+ * skipping the string comparison, because the key might
+ * have been added right after
+ * ext4_fname_setup_ci_filename(). In this case, a hash
+ * mismatch will be a false negative. Therefore, make
+ * sure cf_name was properly initialized before
+ * considering the calculated hash.
+ */
+ if (IS_ENCRYPTED(parent) && fname->cf_name.name &&
+ (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
+ fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de)))
+ return false;
+ /*
+ * Treat comparison errors as not a match. The
+ * only case where it happens is on a disk
+ * corruption or ENOMEM.
+ */
+
+ return generic_ci_match(parent, fname->usr_fname,
+ &fname->cf_name, de->name,
+ de->name_len) > 0;
}
#endif
@@ -1869,8 +1825,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
}
}
-#if IS_ENABLED(CONFIG_UNICODE)
- if (!inode && IS_CASEFOLDED(dir)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
@@ -1878,7 +1833,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
*/
return NULL;
}
-#endif
+
return d_splice_alias(inode, dentry);
}
@@ -3208,16 +3163,14 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
ext4_fc_track_unlink(handle, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
-#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
end_rmdir:
brelse(bh);
@@ -3319,16 +3272,15 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
goto out_trace;
retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
-#if IS_ENABLED(CONFIG_UNICODE)
+
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
out_trace:
trace_ext4_unlink_exit(dentry, retval);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c682fb927b64..eb899628e121 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,8 +1721,8 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
fsparam_flag ("bsdgroups", Opt_grpid),
fsparam_flag ("nogrpid", Opt_nogrpid),
fsparam_flag ("sysvgroups", Opt_nogrpid),
- fsparam_u32 ("resgid", Opt_resgid),
- fsparam_u32 ("resuid", Opt_resuid),
+ fsparam_gid ("resgid", Opt_resgid),
+ fsparam_uid ("resuid", Opt_resuid),
fsparam_u32 ("sb", Opt_sb),
fsparam_enum ("errors", Opt_errors, ext4_param_errors),
fsparam_flag ("nouid32", Opt_nouid32),
@@ -2127,8 +2127,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct fs_parse_result result;
const struct mount_opts *m;
int is_remount;
- kuid_t uid;
- kgid_t gid;
int token;
token = fs_parse(fc, ext4_param_specs, param, &result);
@@ -2270,23 +2268,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->spec |= EXT4_SPEC_s_stripe;
return 0;
case Opt_resuid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid)) {
- ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
- result.uint_32);
- return -EINVAL;
- }
- ctx->s_resuid = uid;
+ ctx->s_resuid = result.uid;
ctx->spec |= EXT4_SPEC_s_resuid;
return 0;
case Opt_resgid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid)) {
- ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
- result.uint_32);
- return -EINVAL;
- }
- ctx->s_resgid = gid;
+ ctx->s_resgid = result.gid;
ctx->spec |= EXT4_SPEC_s_resgid;
return 0;
case Opt_journal_dev:
@@ -3586,14 +3572,12 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (ext4_has_feature_casefold(sb)) {
+ if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR,
"Filesystem with casefold feature cannot be "
"mounted without CONFIG_UNICODE");
return 0;
}
-#endif
if (readonly)
return 1;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index ec2aeccb69a3..8bffdeccdbc3 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -219,8 +219,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 02c9355176d3..cbd7a5e96a37 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
+#if IS_ENABLED(CONFIG_UNICODE)
/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
+ unsigned char *buf;
+ int len;
if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
- fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
- GFP_NOFS, false, F2FS_SB(sb));
- if (!fname->cf_name.name)
+ buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+ GFP_NOFS, false, F2FS_SB(sb));
+ if (!buf)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sb->s_encoding,
- fname->usr_fname,
- fname->cf_name.name,
- F2FS_NAME_LEN);
- if ((int)fname->cf_name.len <= 0) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
+
+ len = utf8_casefold(sb->s_encoding, fname->usr_fname,
+ buf, F2FS_NAME_LEN);
+ if (len <= 0) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
+ return 0;
}
+ fname->cf_name.name = buf;
+ fname->cf_name.len = len;
}
-#endif
+
return 0;
}
+void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+ unsigned char *buf = (unsigned char *)fname->cf_name.name;
+
+ if (buf) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
+ fname->cf_name.name = NULL;
+ }
+}
+#endif /* CONFIG_UNICODE */
+
static int __f2fs_setup_filename(const struct inode *dir,
const struct fscrypt_name *crypt_name,
struct f2fs_filename *fname)
@@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname)
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
- }
-#endif
+ f2fs_free_casefolded_name(fname);
}
static unsigned long dir_block_index(unsigned int level,
@@ -176,58 +185,6 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
return f2fs_find_target_dentry(&d, fname, max_slots);
}
-#if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
- *
- * Returns 1 for a match, 0 for no match, and -errno on an error.
- */
-static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
- const u8 *de_name, u32 de_name_len)
-{
- const struct super_block *sb = dir->i_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
- struct qstr entry = QSTR_INIT(de_name, de_name_len);
- int res;
-
- if (IS_ENCRYPTED(dir)) {
- const struct fscrypt_str encrypted_name =
- FSTR_INIT((u8 *)de_name, de_name_len);
-
- if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
- return -EINVAL;
-
- decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
- if (!decrypted_name.name)
- return -ENOMEM;
- res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
- &decrypted_name);
- if (res < 0)
- goto out;
- entry.name = decrypted_name.name;
- entry.len = decrypted_name.len;
- }
-
- res = utf8_strncasecmp_folded(um, name, &entry);
- /*
- * In strict mode, ignore invalid names. In non-strict mode,
- * fall back to treating them as opaque byte sequences.
- */
- if (res < 0 && !sb_has_strict_encoding(sb)) {
- res = name->len == entry.len &&
- memcmp(name->name, entry.name, name->len) == 0;
- } else {
- /* utf8_strncasecmp_folded returns 0 on match */
- res = (res == 0);
- }
-out:
- kfree(decrypted_name.name);
- return res;
-}
-#endif /* CONFIG_UNICODE */
-
static inline int f2fs_match_name(const struct inode *dir,
const struct f2fs_filename *fname,
const u8 *de_name, u32 de_name_len)
@@ -235,11 +192,11 @@ static inline int f2fs_match_name(const struct inode *dir,
struct fscrypt_name f;
#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
+ if (fname->cf_name.name)
+ return generic_ci_match(dir, fname->usr_fname,
+ &fname->cf_name,
+ de_name, de_name_len);
- return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
- }
#endif
f.usr_fname = fname->usr_fname;
f.disk_name = fname->disk_name;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1974b6aff397..8a9d910aa552 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -531,7 +531,7 @@ struct f2fs_filename {
* internal operation where usr_fname is also NULL. In all these cases
* we fall back to treating the name as an opaque byte sequence.
*/
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};
@@ -3533,8 +3533,22 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
/*
* dir.c
*/
+#if IS_ENABLED(CONFIG_UNICODE)
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname);
+void f2fs_free_casefolded_name(struct f2fs_filename *fname);
+#else
+static inline int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+ return 0;
+}
+
+static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+}
+#endif /* CONFIG_UNICODE */
+
int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct f2fs_filename *fname);
int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5c0b281a70f3..c1ad9b278c47 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -185,7 +185,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
- *pino = parent_ino(dentry);
+ *pino = d_parent_ino(dentry);
dput(dentry);
return 1;
}
@@ -923,10 +923,8 @@ static void __setattr_copy(struct mnt_idmap *idmap,
inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
- if (!vfsgid_in_group_p(vfsgid) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e54f8c08bda8..1ecde2b45e99 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -576,8 +576,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out_iput;
}
out_splice:
-#if IS_ENABLED(CONFIG_UNICODE)
- if (!inode && IS_CASEFOLDED(dir)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
@@ -586,7 +585,7 @@ out_splice:
trace_f2fs_lookup_end(dir, dentry, ino, err);
return NULL;
}
-#endif
+
new = d_splice_alias(inode, dentry);
trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry,
ino, IS_ERR(new) ? PTR_ERR(new) : err);
@@ -639,16 +638,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
-#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
+
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
fail:
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 496aee53c38a..8712e264071f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -46,10 +46,6 @@
static struct kmem_cache *fsync_entry_slab;
-#if IS_ENABLED(CONFIG_UNICODE)
-extern struct kmem_cache *f2fs_cf_name_slab;
-#endif
-
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
@@ -153,11 +149,8 @@ static int init_recovered_filename(const struct inode *dir,
if (err)
return err;
f2fs_hash_filename(dir, fname);
-#if IS_ENABLED(CONFIG_UNICODE)
/* Case-sensitive match is fine for recovery */
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ f2fs_free_casefolded_name(fname);
} else {
f2fs_hash_filename(dir, fname);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1f1b3647a998..df4cf31f93df 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -321,7 +321,7 @@ struct kmem_cache *f2fs_cf_name_slab;
static int __init f2fs_create_casefold_cache(void)
{
f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
- F2FS_NAME_LEN);
+ F2FS_NAME_LEN);
return f2fs_cf_name_slab ? 0 : -ENOMEM;
}
@@ -1326,13 +1326,13 @@ default_check:
return -EINVAL;
}
#endif
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (f2fs_sb_has_casefold(sbi)) {
+
+ if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
-#endif
+
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 66cf4778cf3b..d3e426de5f01 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -7,6 +7,8 @@
#include <linux/hash.h>
#include <linux/ratelimit.h>
#include <linux/msdos_fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
/*
* vfat shortname flags
@@ -51,7 +53,8 @@ struct fat_mount_options {
tz_set:1, /* Filesystem timestamps' offset set */
rodir:1, /* allow ATTR_RO for directory */
discard:1, /* Issue discard requests on deletions */
- dos1xfloppy:1; /* Assume default BPB for DOS 1.x floppies */
+ dos1xfloppy:1, /* Assume default BPB for DOS 1.x floppies */
+ debug:1; /* Not currently used */
};
#define FAT_HASH_BITS 8
@@ -415,12 +418,21 @@ extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
extern struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos);
extern int fat_sync_inode(struct inode *inode);
-extern int fat_fill_super(struct super_block *sb, void *data, int silent,
- int isvfat, void (*setup)(struct super_block *));
+extern int fat_fill_super(struct super_block *sb, struct fs_context *fc,
+ void (*setup)(struct super_block *));
extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de);
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
+
+extern const struct fs_parameter_spec fat_param_spec[];
+int fat_init_fs_context(struct fs_context *fc, bool is_vfat);
+void fat_free_fc(struct fs_context *fc);
+
+int fat_parse_param(struct fs_context *fc, struct fs_parameter *param,
+ bool is_vfat);
+int fat_reconfigure(struct fs_context *fc);
+
static inline unsigned long fat_dir_hash(int logstart)
{
return hash_32(logstart, FAT_HASH_BITS);
diff --git a/fs/fat/fat_test.c b/fs/fat/fat_test.c
index 2dab4ca1d0d8..1f0062659067 100644
--- a/fs/fat/fat_test.c
+++ b/fs/fat/fat_test.c
@@ -193,4 +193,5 @@ static struct kunit_suite fat_test_suite = {
kunit_test_suites(&fat_test_suite);
+MODULE_DESCRIPTION("KUnit tests for FAT filesystems");
MODULE_LICENSE("GPL v2");
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d9e6fbb6f246..19115fd2d2a4 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -16,7 +16,6 @@
#include <linux/mpage.h>
#include <linux/vfs.h>
#include <linux/seq_file.h>
-#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -804,16 +803,17 @@ static void __exit fat_destroy_inodecache(void)
kmem_cache_destroy(fat_inode_cachep);
}
-static int fat_remount(struct super_block *sb, int *flags, char *data)
+int fat_reconfigure(struct fs_context *fc)
{
bool new_rdonly;
+ struct super_block *sb = fc->root->d_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
+ fc->sb_flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
sync_filesystem(sb);
/* make sure we update state on remount. */
- new_rdonly = *flags & SB_RDONLY;
+ new_rdonly = fc->sb_flags & SB_RDONLY;
if (new_rdonly != sb_rdonly(sb)) {
if (new_rdonly)
fat_set_state(sb, 0, 0);
@@ -822,6 +822,7 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
}
return 0;
}
+EXPORT_SYMBOL_GPL(fat_reconfigure);
static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -939,8 +940,6 @@ static const struct super_operations fat_sops = {
.evict_inode = fat_evict_inode,
.put_super = fat_put_super,
.statfs = fat_statfs,
- .remount_fs = fat_remount,
-
.show_options = fat_show_options,
};
@@ -1037,355 +1036,282 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
}
enum {
- Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
- Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
- Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
- Opt_immutable, Opt_dots, Opt_nodots,
- Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
- Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
- Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
- Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
- Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy,
+ Opt_check, Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask,
+ Opt_allow_utime, Opt_codepage, Opt_usefree, Opt_nocase, Opt_quiet,
+ Opt_showexec, Opt_debug, Opt_immutable, Opt_dots, Opt_dotsOK,
+ Opt_charset, Opt_shortname, Opt_utf8, Opt_utf8_bool,
+ Opt_uni_xl, Opt_uni_xl_bool, Opt_nonumtail, Opt_nonumtail_bool,
+ Opt_obsolete, Opt_flush, Opt_tz, Opt_rodir, Opt_errors, Opt_discard,
+ Opt_nfs, Opt_nfs_enum, Opt_time_offset, Opt_dos1xfloppy,
};
-static const match_table_t fat_tokens = {
- {Opt_check_r, "check=relaxed"},
- {Opt_check_s, "check=strict"},
- {Opt_check_n, "check=normal"},
- {Opt_check_r, "check=r"},
- {Opt_check_s, "check=s"},
- {Opt_check_n, "check=n"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_umask, "umask=%o"},
- {Opt_dmask, "dmask=%o"},
- {Opt_fmask, "fmask=%o"},
- {Opt_allow_utime, "allow_utime=%o"},
- {Opt_codepage, "codepage=%u"},
- {Opt_usefree, "usefree"},
- {Opt_nocase, "nocase"},
- {Opt_quiet, "quiet"},
- {Opt_showexec, "showexec"},
- {Opt_debug, "debug"},
- {Opt_immutable, "sys_immutable"},
- {Opt_flush, "flush"},
- {Opt_tz_utc, "tz=UTC"},
- {Opt_time_offset, "time_offset=%d"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_discard, "discard"},
- {Opt_nfs_stale_rw, "nfs"},
- {Opt_nfs_stale_rw, "nfs=stale_rw"},
- {Opt_nfs_nostale_ro, "nfs=nostale_ro"},
- {Opt_dos1xfloppy, "dos1xfloppy"},
- {Opt_obsolete, "conv=binary"},
- {Opt_obsolete, "conv=text"},
- {Opt_obsolete, "conv=auto"},
- {Opt_obsolete, "conv=b"},
- {Opt_obsolete, "conv=t"},
- {Opt_obsolete, "conv=a"},
- {Opt_obsolete, "fat=%u"},
- {Opt_obsolete, "blocksize=%u"},
- {Opt_obsolete, "cvf_format=%20s"},
- {Opt_obsolete, "cvf_options=%100s"},
- {Opt_obsolete, "posix"},
- {Opt_err, NULL},
-};
-static const match_table_t msdos_tokens = {
- {Opt_nodots, "nodots"},
- {Opt_nodots, "dotsOK=no"},
- {Opt_dots, "dots"},
- {Opt_dots, "dotsOK=yes"},
- {Opt_err, NULL}
+static const struct constant_table fat_param_check[] = {
+ {"relaxed", 'r'},
+ {"r", 'r'},
+ {"strict", 's'},
+ {"s", 's'},
+ {"normal", 'n'},
+ {"n", 'n'},
+ {}
};
-static const match_table_t vfat_tokens = {
- {Opt_charset, "iocharset=%s"},
- {Opt_shortname_lower, "shortname=lower"},
- {Opt_shortname_win95, "shortname=win95"},
- {Opt_shortname_winnt, "shortname=winnt"},
- {Opt_shortname_mixed, "shortname=mixed"},
- {Opt_utf8_no, "utf8=0"}, /* 0 or no or false */
- {Opt_utf8_no, "utf8=no"},
- {Opt_utf8_no, "utf8=false"},
- {Opt_utf8_yes, "utf8=1"}, /* empty or 1 or yes or true */
- {Opt_utf8_yes, "utf8=yes"},
- {Opt_utf8_yes, "utf8=true"},
- {Opt_utf8_yes, "utf8"},
- {Opt_uni_xl_no, "uni_xlate=0"}, /* 0 or no or false */
- {Opt_uni_xl_no, "uni_xlate=no"},
- {Opt_uni_xl_no, "uni_xlate=false"},
- {Opt_uni_xl_yes, "uni_xlate=1"}, /* empty or 1 or yes or true */
- {Opt_uni_xl_yes, "uni_xlate=yes"},
- {Opt_uni_xl_yes, "uni_xlate=true"},
- {Opt_uni_xl_yes, "uni_xlate"},
- {Opt_nonumtail_no, "nonumtail=0"}, /* 0 or no or false */
- {Opt_nonumtail_no, "nonumtail=no"},
- {Opt_nonumtail_no, "nonumtail=false"},
- {Opt_nonumtail_yes, "nonumtail=1"}, /* empty or 1 or yes or true */
- {Opt_nonumtail_yes, "nonumtail=yes"},
- {Opt_nonumtail_yes, "nonumtail=true"},
- {Opt_nonumtail_yes, "nonumtail"},
- {Opt_rodir, "rodir"},
- {Opt_err, NULL}
+
+static const struct constant_table fat_param_tz[] = {
+ {"UTC", 0},
+ {}
};
-static int parse_options(struct super_block *sb, char *options, int is_vfat,
- int silent, int *debug, struct fat_mount_options *opts)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
- char *iocharset;
+static const struct constant_table fat_param_errors[] = {
+ {"continue", FAT_ERRORS_CONT},
+ {"panic", FAT_ERRORS_PANIC},
+ {"remount-ro", FAT_ERRORS_RO},
+ {}
+};
- opts->isvfat = is_vfat;
- opts->fs_uid = current_uid();
- opts->fs_gid = current_gid();
- opts->fs_fmask = opts->fs_dmask = current_umask();
- opts->allow_utime = -1;
- opts->codepage = fat_default_codepage;
- fat_reset_iocharset(opts);
- if (is_vfat) {
- opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
- opts->rodir = 0;
- } else {
- opts->shortname = 0;
- opts->rodir = 1;
- }
- opts->name_check = 'n';
- opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
- opts->unicode_xlate = 0;
- opts->numtail = 1;
- opts->usefree = opts->nocase = 0;
- opts->tz_set = 0;
- opts->nfs = 0;
- opts->errors = FAT_ERRORS_RO;
- *debug = 0;
+static const struct constant_table fat_param_nfs[] = {
+ {"stale_rw", FAT_NFS_STALE_RW},
+ {"nostale_ro", FAT_NFS_NOSTALE_RO},
+ {}
+};
- opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+/*
+ * These are all obsolete but we still reject invalid options.
+ * The corresponding values are therefore meaningless.
+ */
+static const struct constant_table fat_param_conv[] = {
+ {"binary", 0},
+ {"text", 0},
+ {"auto", 0},
+ {"b", 0},
+ {"t", 0},
+ {"a", 0},
+ {}
+};
- if (!options)
- goto out;
+/* Core options. See below for vfat and msdos extras */
+const struct fs_parameter_spec fat_param_spec[] = {
+ fsparam_enum ("check", Opt_check, fat_param_check),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("umask", Opt_umask),
+ fsparam_u32oct ("dmask", Opt_dmask),
+ fsparam_u32oct ("fmask", Opt_fmask),
+ fsparam_u32oct ("allow_utime", Opt_allow_utime),
+ fsparam_u32 ("codepage", Opt_codepage),
+ fsparam_flag ("usefree", Opt_usefree),
+ fsparam_flag ("nocase", Opt_nocase),
+ fsparam_flag ("quiet", Opt_quiet),
+ fsparam_flag ("showexec", Opt_showexec),
+ fsparam_flag ("debug", Opt_debug),
+ fsparam_flag ("sys_immutable", Opt_immutable),
+ fsparam_flag ("flush", Opt_flush),
+ fsparam_enum ("tz", Opt_tz, fat_param_tz),
+ fsparam_s32 ("time_offset", Opt_time_offset),
+ fsparam_enum ("errors", Opt_errors, fat_param_errors),
+ fsparam_flag ("discard", Opt_discard),
+ fsparam_flag ("nfs", Opt_nfs),
+ fsparam_enum ("nfs", Opt_nfs_enum, fat_param_nfs),
+ fsparam_flag ("dos1xfloppy", Opt_dos1xfloppy),
+ __fsparam(fs_param_is_enum, "conv",
+ Opt_obsolete, fs_param_deprecated, fat_param_conv),
+ __fsparam(fs_param_is_u32, "fat",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_u32, "blocksize",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_string, "cvf_format",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(fs_param_is_string, "cvf_options",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ __fsparam(NULL, "posix",
+ Opt_obsolete, fs_param_deprecated, NULL),
+ {}
+};
+EXPORT_SYMBOL_GPL(fat_param_spec);
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
+static const struct fs_parameter_spec msdos_param_spec[] = {
+ fsparam_flag_no ("dots", Opt_dots),
+ fsparam_bool ("dotsOK", Opt_dotsOK),
+ {}
+};
- token = match_token(p, fat_tokens, args);
- if (token == Opt_err) {
- if (is_vfat)
- token = match_token(p, vfat_tokens, args);
- else
- token = match_token(p, msdos_tokens, args);
- }
- switch (token) {
- case Opt_check_s:
- opts->name_check = 's';
- break;
- case Opt_check_r:
- opts->name_check = 'r';
- break;
- case Opt_check_n:
- opts->name_check = 'n';
- break;
- case Opt_usefree:
- opts->usefree = 1;
- break;
- case Opt_nocase:
- if (!is_vfat)
- opts->nocase = 1;
- else {
- /* for backward compatibility */
- opts->shortname = VFAT_SFN_DISPLAY_WIN95
- | VFAT_SFN_CREATE_WIN95;
- }
- break;
- case Opt_quiet:
- opts->quiet = 1;
- break;
- case Opt_showexec:
- opts->showexec = 1;
- break;
- case Opt_debug:
- *debug = 1;
- break;
- case Opt_immutable:
- opts->sys_immutable = 1;
- break;
- case Opt_uid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->fs_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(opts->fs_uid))
- return -EINVAL;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->fs_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(opts->fs_gid))
- return -EINVAL;
- break;
- case Opt_umask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_fmask = opts->fs_dmask = option;
- break;
- case Opt_dmask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_dmask = option;
- break;
- case Opt_fmask:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->fs_fmask = option;
- break;
- case Opt_allow_utime:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->allow_utime = option & (S_IWGRP | S_IWOTH);
- break;
- case Opt_codepage:
- if (match_int(&args[0], &option))
- return -EINVAL;
- opts->codepage = option;
- break;
- case Opt_flush:
- opts->flush = 1;
- break;
- case Opt_time_offset:
- if (match_int(&args[0], &option))
- return -EINVAL;
- /*
- * GMT+-12 zones may have DST corrections so at least
- * 13 hours difference is needed. Make the limit 24
- * just in case someone invents something unusual.
- */
- if (option < -24 * 60 || option > 24 * 60)
- return -EINVAL;
- opts->tz_set = 1;
- opts->time_offset = option;
- break;
- case Opt_tz_utc:
- opts->tz_set = 1;
- opts->time_offset = 0;
- break;
- case Opt_err_cont:
- opts->errors = FAT_ERRORS_CONT;
- break;
- case Opt_err_panic:
- opts->errors = FAT_ERRORS_PANIC;
- break;
- case Opt_err_ro:
- opts->errors = FAT_ERRORS_RO;
- break;
- case Opt_nfs_stale_rw:
- opts->nfs = FAT_NFS_STALE_RW;
- break;
- case Opt_nfs_nostale_ro:
- opts->nfs = FAT_NFS_NOSTALE_RO;
- break;
- case Opt_dos1xfloppy:
- opts->dos1xfloppy = 1;
- break;
+static const struct constant_table fat_param_shortname[] = {
+ {"lower", VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95},
+ {"win95", VFAT_SFN_DISPLAY_WIN95 | VFAT_SFN_CREATE_WIN95},
+ {"winnt", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WINNT},
+ {"mixed", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WIN95},
+ {}
+};
- /* msdos specific */
- case Opt_dots:
- opts->dotsOK = 1;
- break;
- case Opt_nodots:
- opts->dotsOK = 0;
- break;
+static const struct fs_parameter_spec vfat_param_spec[] = {
+ fsparam_string ("iocharset", Opt_charset),
+ fsparam_enum ("shortname", Opt_shortname, fat_param_shortname),
+ fsparam_flag ("utf8", Opt_utf8),
+ fsparam_bool ("utf8", Opt_utf8_bool),
+ fsparam_flag ("uni_xlate", Opt_uni_xl),
+ fsparam_bool ("uni_xlate", Opt_uni_xl_bool),
+ fsparam_flag ("nonumtail", Opt_nonumtail),
+ fsparam_bool ("nonumtail", Opt_nonumtail_bool),
+ fsparam_flag ("rodir", Opt_rodir),
+ {}
+};
- /* vfat specific */
- case Opt_charset:
- fat_reset_iocharset(opts);
- iocharset = match_strdup(&args[0]);
- if (!iocharset)
- return -ENOMEM;
- opts->iocharset = iocharset;
- break;
- case Opt_shortname_lower:
- opts->shortname = VFAT_SFN_DISPLAY_LOWER
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_shortname_win95:
- opts->shortname = VFAT_SFN_DISPLAY_WIN95
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_shortname_winnt:
- opts->shortname = VFAT_SFN_DISPLAY_WINNT
- | VFAT_SFN_CREATE_WINNT;
- break;
- case Opt_shortname_mixed:
- opts->shortname = VFAT_SFN_DISPLAY_WINNT
- | VFAT_SFN_CREATE_WIN95;
- break;
- case Opt_utf8_no: /* 0 or no or false */
- opts->utf8 = 0;
- break;
- case Opt_utf8_yes: /* empty or 1 or yes or true */
- opts->utf8 = 1;
- break;
- case Opt_uni_xl_no: /* 0 or no or false */
- opts->unicode_xlate = 0;
- break;
- case Opt_uni_xl_yes: /* empty or 1 or yes or true */
- opts->unicode_xlate = 1;
- break;
- case Opt_nonumtail_no: /* 0 or no or false */
- opts->numtail = 1; /* negated option */
- break;
- case Opt_nonumtail_yes: /* empty or 1 or yes or true */
- opts->numtail = 0; /* negated option */
- break;
- case Opt_rodir:
- opts->rodir = 1;
- break;
- case Opt_discard:
- opts->discard = 1;
- break;
+int fat_parse_param(struct fs_context *fc, struct fs_parameter *param,
+ bool is_vfat)
+{
+ struct fat_mount_options *opts = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
- /* obsolete mount options */
- case Opt_obsolete:
- fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
- "not supported now", p);
- break;
- /* unknown option */
- default:
- if (!silent) {
- fat_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" "
- "or missing value", p);
- }
- return -EINVAL;
- }
- }
+ /* remount options have traditionally been ignored */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
-out:
- /* UTF-8 doesn't provide FAT semantics */
- if (!strcmp(opts->iocharset, "utf8")) {
- fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
- " for FAT filesystems, filesystem will be "
- "case sensitive!");
+ opt = fs_parse(fc, fat_param_spec, param, &result);
+ /* If option not found in fat_param_spec, try vfat/msdos options */
+ if (opt == -ENOPARAM) {
+ if (is_vfat)
+ opt = fs_parse(fc, vfat_param_spec, param, &result);
+ else
+ opt = fs_parse(fc, msdos_param_spec, param, &result);
}
- /* If user doesn't specify allow_utime, it's initialized from dmask. */
- if (opts->allow_utime == (unsigned short)-1)
- opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
- if (opts->unicode_xlate)
- opts->utf8 = 0;
- if (opts->nfs == FAT_NFS_NOSTALE_RO) {
- sb->s_flags |= SB_RDONLY;
- sb->s_export_op = &fat_export_ops_nostale;
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_check:
+ opts->name_check = result.uint_32;
+ break;
+ case Opt_usefree:
+ opts->usefree = 1;
+ break;
+ case Opt_nocase:
+ if (!is_vfat)
+ opts->nocase = 1;
+ else {
+ /* for backward compatibility */
+ opts->shortname = VFAT_SFN_DISPLAY_WIN95
+ | VFAT_SFN_CREATE_WIN95;
+ }
+ break;
+ case Opt_quiet:
+ opts->quiet = 1;
+ break;
+ case Opt_showexec:
+ opts->showexec = 1;
+ break;
+ case Opt_debug:
+ opts->debug = 1;
+ break;
+ case Opt_immutable:
+ opts->sys_immutable = 1;
+ break;
+ case Opt_uid:
+ opts->fs_uid = result.uid;
+ break;
+ case Opt_gid:
+ opts->fs_gid = result.gid;
+ break;
+ case Opt_umask:
+ opts->fs_fmask = opts->fs_dmask = result.uint_32;
+ break;
+ case Opt_dmask:
+ opts->fs_dmask = result.uint_32;
+ break;
+ case Opt_fmask:
+ opts->fs_fmask = result.uint_32;
+ break;
+ case Opt_allow_utime:
+ opts->allow_utime = result.uint_32 & (S_IWGRP | S_IWOTH);
+ break;
+ case Opt_codepage:
+ opts->codepage = result.uint_32;
+ break;
+ case Opt_flush:
+ opts->flush = 1;
+ break;
+ case Opt_time_offset:
+ /*
+ * GMT+-12 zones may have DST corrections so at least
+ * 13 hours difference is needed. Make the limit 24
+ * just in case someone invents something unusual.
+ */
+ if (result.int_32 < -24 * 60 || result.int_32 > 24 * 60)
+ return -EINVAL;
+ opts->tz_set = 1;
+ opts->time_offset = result.int_32;
+ break;
+ case Opt_tz:
+ opts->tz_set = 1;
+ opts->time_offset = result.uint_32;
+ break;
+ case Opt_errors:
+ opts->errors = result.uint_32;
+ break;
+ case Opt_nfs:
+ opts->nfs = FAT_NFS_STALE_RW;
+ break;
+ case Opt_nfs_enum:
+ opts->nfs = result.uint_32;
+ break;
+ case Opt_dos1xfloppy:
+ opts->dos1xfloppy = 1;
+ break;
+
+ /* msdos specific */
+ case Opt_dots: /* dots / nodots */
+ opts->dotsOK = !result.negated;
+ break;
+ case Opt_dotsOK: /* dotsOK = yes/no */
+ opts->dotsOK = result.boolean;
+ break;
+
+ /* vfat specific */
+ case Opt_charset:
+ fat_reset_iocharset(opts);
+ opts->iocharset = param->string;
+ param->string = NULL; /* Steal string */
+ break;
+ case Opt_shortname:
+ opts->shortname = result.uint_32;
+ break;
+ case Opt_utf8:
+ opts->utf8 = 1;
+ break;
+ case Opt_utf8_bool:
+ opts->utf8 = result.boolean;
+ break;
+ case Opt_uni_xl:
+ opts->unicode_xlate = 1;
+ break;
+ case Opt_uni_xl_bool:
+ opts->unicode_xlate = result.boolean;
+ break;
+ case Opt_nonumtail:
+ opts->numtail = 0; /* negated option */
+ break;
+ case Opt_nonumtail_bool:
+ opts->numtail = !result.boolean; /* negated option */
+ break;
+ case Opt_rodir:
+ opts->rodir = 1;
+ break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
+
+ /* obsolete mount options */
+ case Opt_obsolete:
+ printk(KERN_INFO "FAT-fs: \"%s\" option is obsolete, "
+ "not supported now", param->key);
+ break;
+ default:
+ return -EINVAL;
}
return 0;
}
+EXPORT_SYMBOL_GPL(fat_parse_param);
static int fat_read_root(struct inode *inode)
{
@@ -1604,9 +1530,11 @@ out:
/*
* Read the super block of an MS-DOS FS.
*/
-int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
+int fat_fill_super(struct super_block *sb, struct fs_context *fc,
void (*setup)(struct super_block *))
{
+ struct fat_mount_options *opts = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct inode *root_inode = NULL, *fat_inode = NULL;
struct inode *fsinfo_inode = NULL;
struct buffer_head *bh;
@@ -1614,7 +1542,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
struct msdos_sb_info *sbi;
u16 logical_sector_size;
u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
- int debug;
long error;
char buf[50];
struct timespec64 ts;
@@ -1643,9 +1570,27 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
- if (error)
- goto out_fail;
+ /* UTF-8 doesn't provide FAT semantics */
+ if (!strcmp(opts->iocharset, "utf8")) {
+ fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
+ " for FAT filesystems, filesystem will be"
+ " case sensitive!");
+ }
+
+ /* If user doesn't specify allow_utime, it's initialized from dmask. */
+ if (opts->allow_utime == (unsigned short)-1)
+ opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
+ if (opts->unicode_xlate)
+ opts->utf8 = 0;
+ if (opts->nfs == FAT_NFS_NOSTALE_RO) {
+ sb->s_flags |= SB_RDONLY;
+ sb->s_export_op = &fat_export_ops_nostale;
+ }
+
+ /* Apply parsed options to sbi (structure copy) */
+ sbi->options = *opts;
+ /* Transfer ownership of iocharset to sbi->options */
+ opts->iocharset = NULL;
setup(sb); /* flavour-specific stuff that needs options */
@@ -1950,6 +1895,57 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
}
EXPORT_SYMBOL_GPL(fat_flush_inodes);
+int fat_init_fs_context(struct fs_context *fc, bool is_vfat)
+{
+ struct fat_mount_options *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ opts->isvfat = is_vfat;
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
+ opts->fs_fmask = opts->fs_dmask = current_umask();
+ opts->allow_utime = -1;
+ opts->codepage = fat_default_codepage;
+ fat_reset_iocharset(opts);
+ if (is_vfat) {
+ opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
+ opts->rodir = 0;
+ } else {
+ opts->shortname = 0;
+ opts->rodir = 1;
+ }
+ opts->name_check = 'n';
+ opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
+ opts->unicode_xlate = 0;
+ opts->numtail = 1;
+ opts->usefree = opts->nocase = 0;
+ opts->tz_set = 0;
+ opts->nfs = 0;
+ opts->errors = FAT_ERRORS_RO;
+ opts->debug = 0;
+
+ opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+
+ fc->fs_private = opts;
+ /* fc->ops assigned by caller */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fat_init_fs_context);
+
+void fat_free_fc(struct fs_context *fc)
+{
+ struct fat_mount_options *opts = fc->fs_private;
+
+ if (opts->iocharset != fat_default_iocharset)
+ kfree(opts->iocharset);
+ kfree(fc->fs_private);
+}
+EXPORT_SYMBOL_GPL(fat_free_fc);
+
static int __init init_fat_fs(void)
{
int err;
@@ -1978,4 +1974,5 @@ static void __exit exit_fat_fs(void)
module_init(init_fat_fs)
module_exit(exit_fat_fs)
+MODULE_DESCRIPTION("Core FAT filesystem support");
MODULE_LICENSE("GPL");
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 2116c486843b..f06f6ba643cc 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -650,24 +650,48 @@ static void setup(struct super_block *sb)
sb->s_flags |= SB_NOATIME;
}
-static int msdos_fill_super(struct super_block *sb, void *data, int silent)
+static int msdos_fill_super(struct super_block *sb, struct fs_context *fc)
{
- return fat_fill_super(sb, data, silent, 0, setup);
+ return fat_fill_super(sb, fc, setup);
}
-static struct dentry *msdos_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+static int msdos_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
+ return get_tree_bdev(fc, msdos_fill_super);
+}
+
+static int msdos_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ return fat_parse_param(fc, param, false);
+}
+
+static const struct fs_context_operations msdos_context_ops = {
+ .parse_param = msdos_parse_param,
+ .get_tree = msdos_get_tree,
+ .reconfigure = fat_reconfigure,
+ .free = fat_free_fc,
+};
+
+static int msdos_init_fs_context(struct fs_context *fc)
+{
+ int err;
+
+ /* Initialize with is_vfat == false */
+ err = fat_init_fs_context(fc, false);
+ if (err)
+ return err;
+
+ fc->ops = &msdos_context_ops;
+ return 0;
}
static struct file_system_type msdos_fs_type = {
.owner = THIS_MODULE,
.name = "msdos",
- .mount = msdos_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .init_fs_context = msdos_init_fs_context,
+ .parameters = fat_param_spec,
};
MODULE_ALIAS_FS("msdos");
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c4d00999a433..6423e1dedf14 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1195,24 +1195,48 @@ static void setup(struct super_block *sb)
sb->s_d_op = &vfat_dentry_ops;
}
-static int vfat_fill_super(struct super_block *sb, void *data, int silent)
+static int vfat_fill_super(struct super_block *sb, struct fs_context *fc)
{
- return fat_fill_super(sb, data, silent, 1, setup);
+ return fat_fill_super(sb, fc, setup);
}
-static struct dentry *vfat_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+static int vfat_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
+ return get_tree_bdev(fc, vfat_fill_super);
+}
+
+static int vfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ return fat_parse_param(fc, param, true);
+}
+
+static const struct fs_context_operations vfat_context_ops = {
+ .parse_param = vfat_parse_param,
+ .get_tree = vfat_get_tree,
+ .reconfigure = fat_reconfigure,
+ .free = fat_free_fc,
+};
+
+static int vfat_init_fs_context(struct fs_context *fc)
+{
+ int err;
+
+ /* Initialize with is_vfat == true */
+ err = fat_init_fs_context(fc, true);
+ if (err)
+ return err;
+
+ fc->ops = &vfat_context_ops;
+ return 0;
}
static struct file_system_type vfat_fs_type = {
.owner = THIS_MODULE,
.name = "vfat",
- .mount = vfat_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .init_fs_context = vfat_init_fs_context,
+ .parameters = fat_param_spec,
};
MODULE_ALIAS_FS("vfat");
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..6e8cea16790e 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -115,88 +115,188 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
return err;
}
-static struct vfsmount *get_vfsmount_from_fd(int fd)
+static int get_path_from_fd(int fd, struct path *root)
{
- struct vfsmount *mnt;
-
if (fd == AT_FDCWD) {
struct fs_struct *fs = current->fs;
spin_lock(&fs->lock);
- mnt = mntget(fs->pwd.mnt);
+ *root = fs->pwd;
+ path_get(root);
spin_unlock(&fs->lock);
} else {
struct fd f = fdget(fd);
if (!f.file)
- return ERR_PTR(-EBADF);
- mnt = mntget(f.file->f_path.mnt);
+ return -EBADF;
+ *root = f.file->f_path;
+ path_get(root);
fdput(f);
}
- return mnt;
+
+ return 0;
}
+enum handle_to_path_flags {
+ HANDLE_CHECK_PERMS = (1 << 0),
+ HANDLE_CHECK_SUBTREE = (1 << 1),
+};
+
+struct handle_to_path_ctx {
+ struct path root;
+ enum handle_to_path_flags flags;
+ unsigned int fh_flags;
+};
+
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
{
- return 1;
+ struct handle_to_path_ctx *ctx = context;
+ struct user_namespace *user_ns = current_user_ns();
+ struct dentry *d, *root = ctx->root.dentry;
+ struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt);
+ int retval = 0;
+
+ if (!root)
+ return 1;
+
+ /* Old permission model with global CAP_DAC_READ_SEARCH. */
+ if (!ctx->flags)
+ return 1;
+
+ /*
+ * It's racy as we're not taking rename_lock but we're able to ignore
+ * permissions and we just need an approximation whether we were able
+ * to follow a path to the file.
+ *
+ * It's also potentially expensive on some filesystems especially if
+ * there is a deep path.
+ */
+ d = dget(dentry);
+ while (d != root && !IS_ROOT(d)) {
+ struct dentry *parent = dget_parent(d);
+
+ /*
+ * We know that we have the ability to override DAC permissions
+ * as we've verified this earlier via CAP_DAC_READ_SEARCH. But
+ * we also need to make sure that there aren't any unmapped
+ * inodes in the path that would prevent us from reaching the
+ * file.
+ */
+ if (!privileged_wrt_inode_uidgid(user_ns, idmap,
+ d_inode(parent))) {
+ dput(d);
+ dput(parent);
+ return retval;
+ }
+
+ dput(d);
+ d = parent;
+ }
+
+ if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root)
+ retval = 1;
+ WARN_ON_ONCE(d != root && d != root->d_sb->s_root);
+ dput(d);
+ return retval;
}
-static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
- struct path *path)
+static int do_handle_to_path(struct file_handle *handle, struct path *path,
+ struct handle_to_path_ctx *ctx)
{
- int retval = 0;
int handle_dwords;
+ struct vfsmount *mnt = ctx->root.mnt;
- path->mnt = get_vfsmount_from_fd(mountdirfd);
- if (IS_ERR(path->mnt)) {
- retval = PTR_ERR(path->mnt);
- goto out_err;
- }
/* change the handle size to multiple of sizeof(u32) */
handle_dwords = handle->handle_bytes >> 2;
- path->dentry = exportfs_decode_fh(path->mnt,
+ path->dentry = exportfs_decode_fh_raw(mnt,
(struct fid *)handle->f_handle,
handle_dwords, handle->handle_type,
- vfs_dentry_acceptable, NULL);
- if (IS_ERR(path->dentry)) {
- retval = PTR_ERR(path->dentry);
- goto out_mnt;
+ ctx->fh_flags,
+ vfs_dentry_acceptable, ctx);
+ if (IS_ERR_OR_NULL(path->dentry)) {
+ if (path->dentry == ERR_PTR(-ENOMEM))
+ return -ENOMEM;
+ return -ESTALE;
}
+ path->mnt = mntget(mnt);
return 0;
-out_mnt:
- mntput(path->mnt);
-out_err:
- return retval;
+}
+
+/*
+ * Allow relaxed permissions of file handles if the caller has the
+ * ability to mount the filesystem or create a bind-mount of the
+ * provided @mountdirfd.
+ *
+ * In both cases the caller may be able to get an unobstructed way to
+ * the encoded file handle. If the caller is only able to create a
+ * bind-mount we need to verify that there are no locked mounts on top
+ * of it that could prevent us from getting to the encoded file.
+ *
+ * In principle, locked mounts can prevent the caller from mounting the
+ * filesystem but that only applies to procfs and sysfs neither of which
+ * support decoding file handles.
+ */
+static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
+ unsigned int o_flags)
+{
+ struct path *root = &ctx->root;
+
+ /*
+ * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
+ * confusing api in the face of disconnected non-dir dentries.
+ *
+ * There's only one dentry for each directory inode (VFS rule)...
+ */
+ if (!(o_flags & O_DIRECTORY))
+ return false;
+
+ if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ ctx->flags = HANDLE_CHECK_PERMS;
+ else if (is_mounted(root->mnt) &&
+ ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
+ CAP_SYS_ADMIN) &&
+ !has_locked_children(real_mount(root->mnt), root->dentry))
+ ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
+ else
+ return false;
+
+ /* Are we able to override DAC permissions? */
+ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
+ return false;
+
+ ctx->fh_flags = EXPORT_FH_DIR_ONLY;
+ return true;
}
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
- struct path *path)
+ struct path *path, unsigned int o_flags)
{
int retval = 0;
struct file_handle f_handle;
struct file_handle *handle = NULL;
+ struct handle_to_path_ctx ctx = {};
- /*
- * With handle we don't look at the execute bit on the
- * directory. Ideally we would like CAP_DAC_SEARCH.
- * But we don't have that
- */
- if (!capable(CAP_DAC_READ_SEARCH)) {
- retval = -EPERM;
+ retval = get_path_from_fd(mountdirfd, &ctx.root);
+ if (retval)
goto out_err;
+
+ if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+ retval = -EPERM;
+ goto out_path;
}
+
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
retval = -EFAULT;
- goto out_err;
+ goto out_path;
}
if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
(f_handle.handle_bytes == 0)) {
retval = -EINVAL;
- goto out_err;
+ goto out_path;
}
handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL);
if (!handle) {
retval = -ENOMEM;
- goto out_err;
+ goto out_path;
}
/* copy the full handle */
*handle = f_handle;
@@ -207,10 +307,12 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
goto out_handle;
}
- retval = do_handle_to_path(mountdirfd, handle, path);
+ retval = do_handle_to_path(handle, path, &ctx);
out_handle:
kfree(handle);
+out_path:
+ path_put(&ctx.root);
out_err:
return retval;
}
@@ -223,7 +325,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
struct file *file;
int fd;
- retval = handle_to_path(mountdirfd, ufh, &path);
+ retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
if (retval)
return retval;
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index a4d6ca0b8971..24727ec34e5a 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -308,6 +308,40 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
}
EXPORT_SYMBOL(fs_param_is_fd);
+int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ kuid_t uid;
+
+ if (fs_param_is_u32(log, p, param, result) != 0)
+ return fs_param_bad_value(log, param);
+
+ uid = make_kuid(current_user_ns(), result->uint_32);
+ if (!uid_valid(uid))
+ return inval_plog(log, "Invalid uid '%s'", param->string);
+
+ result->uid = uid;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_uid);
+
+int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ kgid_t gid;
+
+ if (fs_param_is_u32(log, p, param, result) != 0)
+ return fs_param_bad_value(log, param);
+
+ gid = make_kgid(current_user_ns(), result->uint_32);
+ if (!gid_valid(gid))
+ return inval_plog(log, "Invalid gid '%s'", param->string);
+
+ result->gid = gid;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_gid);
+
int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
struct fs_parameter *param, struct fs_parse_result *result)
{
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 6593ae518115..ed2dd000622e 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -220,10 +220,6 @@ static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
if (!mount_capable(fc))
return -EPERM;
- /* require the new mount api */
- if (exclusive && fc->ops == &legacy_fs_context_ops)
- return -EOPNOTSUPP;
-
fc->phase = FS_CONTEXT_CREATING;
fc->exclusive = exclusive;
@@ -411,6 +407,7 @@ SYSCALL_DEFINE5(fsconfig,
case FSCONFIG_SET_PATH:
case FSCONFIG_SET_PATH_EMPTY:
case FSCONFIG_SET_FD:
+ case FSCONFIG_CMD_CREATE_EXCL:
ret = -EOPNOTSUPP;
goto out_f;
}
@@ -451,7 +448,7 @@ SYSCALL_DEFINE5(fsconfig,
fallthrough;
case FSCONFIG_SET_PATH:
param.type = fs_value_is_filename;
- param.name = getname_flags(_value, lookup_flags, NULL);
+ param.name = getname_flags(_value, lookup_flags);
if (IS_ERR(param.name)) {
ret = PTR_ERR(param.name);
goto out_key;
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index 3d192b80a561..04cfd8fee992 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -146,8 +146,8 @@ int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
* be stripped.
*/
if (fc->posix_acl &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(&nop_mnt_idmap, inode)) &&
- !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID))
+ !in_group_or_capable(&nop_mnt_idmap, inode,
+ i_gid_into_vfsgid(&nop_mnt_idmap, inode)))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 99e44ea7d875..d8ab4e93916f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -740,8 +740,8 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
fsparam_string ("source", OPT_SOURCE),
fsparam_u32 ("fd", OPT_FD),
fsparam_u32oct ("rootmode", OPT_ROOTMODE),
- fsparam_u32 ("user_id", OPT_USER_ID),
- fsparam_u32 ("group_id", OPT_GROUP_ID),
+ fsparam_uid ("user_id", OPT_USER_ID),
+ fsparam_gid ("group_id", OPT_GROUP_ID),
fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS),
fsparam_flag ("allow_other", OPT_ALLOW_OTHER),
fsparam_u32 ("max_read", OPT_MAX_READ),
@@ -755,6 +755,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
struct fs_parse_result result;
struct fuse_fs_context *ctx = fsc->fs_private;
int opt;
+ kuid_t kuid;
+ kgid_t kgid;
if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
/*
@@ -799,16 +801,26 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
break;
case OPT_USER_ID:
- ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
- if (!uid_valid(ctx->user_id))
+ kuid = result.uid;
+ /*
+ * The requested uid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kuid_has_mapping(fsc->user_ns, kuid))
return invalfc(fsc, "Invalid user_id");
+ ctx->user_id = kuid;
ctx->user_id_present = true;
break;
case OPT_GROUP_ID:
- ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
- if (!gid_valid(ctx->group_id))
+ kgid = result.gid;
+ /*
+ * The requested gid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kgid_has_mapping(fsc->user_ns, kgid))
return invalfc(fsc, "Invalid group_id");
+ ctx->group_id = kgid;
ctx->group_id_present = true;
break;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 8c34798a0715..744e10b46904 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -200,6 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
HFS_I(inode)->fs_blocks = 0;
+ HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60;
if (S_ISDIR(mode)) {
inode->i_size = 2;
HFS_SB(sb)->folder_count++;
@@ -275,6 +276,8 @@ void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
for (count = 0, i = 0; i < 3; i++)
count += be16_to_cpu(ext[i].count);
HFS_I(inode)->first_blocks = count;
+ HFS_I(inode)->cached_start = 0;
+ HFS_I(inode)->cached_blocks = 0;
inode->i_size = HFS_I(inode)->phys_size = log_size;
HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6764afa98a6f..eeac99765f0d 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -28,6 +28,7 @@
static struct kmem_cache *hfs_inode_cachep;
+MODULE_DESCRIPTION("Apple Macintosh file system support");
MODULE_LICENSE("GPL");
static int hfs_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index ca2ba8c9f82e..901e83d65d20 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -25,19 +25,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- switch (tree->cnid) {
- case HFSPLUS_CAT_CNID:
- mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
- break;
- case HFSPLUS_EXT_CNID:
- mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
- break;
- case HFSPLUS_ATTR_CNID:
- mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
- break;
- default:
- BUG();
- }
+ mutex_lock_nested(&tree->tree_lock,
+ hfsplus_btree_lock_class(tree));
return 0;
}
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 3c572e44f2ad..9c51867dddc5 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -430,7 +430,8 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
hfsplus_free_extents(sb, ext_entry, total_blocks - start,
total_blocks);
total_blocks = start;
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
} while (total_blocks > blocks);
hfs_find_exit(&fd);
@@ -592,7 +593,8 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->first_extents);
hip->first_blocks = blk_cnt;
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
break;
}
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
@@ -606,7 +608,8 @@ void hfsplus_file_truncate(struct inode *inode)
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->cached_extents);
- mutex_lock(&fd.tree->tree_lock);
+ mutex_lock_nested(&fd.tree->tree_lock,
+ hfsplus_btree_lock_class(fd.tree));
if (blk_cnt > start) {
hip->extent_state |= HFSPLUS_EXT_DIRTY;
break;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 012a3d003fbe..9e78f181c24f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -553,6 +553,27 @@ static inline __be32 __hfsp_ut2mt(time64_t ut)
return cpu_to_be32(lower_32_bits(ut) + HFSPLUS_UTC_OFFSET);
}
+static inline enum hfsplus_btree_mutex_classes
+hfsplus_btree_lock_class(struct hfs_btree *tree)
+{
+ enum hfsplus_btree_mutex_classes class;
+
+ switch (tree->cnid) {
+ case HFSPLUS_CAT_CNID:
+ class = CATALOG_BTREE_MUTEX;
+ break;
+ case HFSPLUS_EXT_CNID:
+ class = EXTENTS_BTREE_MUTEX;
+ break;
+ case HFSPLUS_ATTR_CNID:
+ class = ATTR_BTREE_MUTEX;
+ break;
+ default:
+ BUG();
+ }
+ return class;
+}
+
/* compatibility */
#define hfsp_mt2ut(t) (struct timespec64){ .tv_sec = __hfsp_mt2ut(t) }
#define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec)
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5661a2e24d03..40d04dba13ac 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -40,7 +40,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
/* Directory containing the bootable system */
vh->finder_info[0] = bvh->finder_info[0] =
- cpu_to_be32(parent_ino(dentry));
+ cpu_to_be32(d_parent_ino(dentry));
/*
* Bootloader. Just using the inode here breaks in the case of
@@ -51,7 +51,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
/* Per spec, the OS X system folder - same as finder_info[0] here */
vh->finder_info[5] = bvh->finder_info[5] =
- cpu_to_be32(parent_ino(dentry));
+ cpu_to_be32(d_parent_ino(dentry));
mutex_unlock(&sbi->vh_mutex);
return 0;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 5a400259ae74..9a1a93e3888b 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
+ strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
if (!strbuf) {
res = -ENOMEM;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a73d27c4dd58..3eb747d26924 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -16,11 +16,16 @@
#include <linux/seq_file.h>
#include <linux/writeback.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/namei.h>
#include "hostfs.h"
#include <init.h>
#include <kern.h>
+struct hostfs_fs_info {
+ char *host_root_path;
+};
+
struct hostfs_inode_info {
int fd;
fmode_t mode;
@@ -90,8 +95,10 @@ static char *__dentry_name(struct dentry *dentry, char *name)
char *p = dentry_path_raw(dentry, name, PATH_MAX);
char *root;
size_t len;
+ struct hostfs_fs_info *fsi;
- root = dentry->d_sb->s_fs_info;
+ fsi = dentry->d_sb->s_fs_info;
+ root = fsi->host_root_path;
len = strlen(root);
if (IS_ERR(p)) {
__putname(name);
@@ -196,8 +203,10 @@ static int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
long long f_bavail;
long long f_files;
long long f_ffree;
+ struct hostfs_fs_info *fsi;
- err = do_statfs(dentry->d_sb->s_fs_info,
+ fsi = dentry->d_sb->s_fs_info;
+ err = do_statfs(fsi->host_root_path,
&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
&sf->f_namelen);
@@ -245,7 +254,11 @@ static void hostfs_free_inode(struct inode *inode)
static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
{
- const char *root_path = root->d_sb->s_fs_info;
+ struct hostfs_fs_info *fsi;
+ const char *root_path;
+
+ fsi = root->d_sb->s_fs_info;
+ root_path = fsi->host_root_path;
size_t offset = strlen(root_ino) + 1;
if (strlen(root_path) > offset)
@@ -432,31 +445,20 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
static int hostfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
char *buffer;
- loff_t start = page_offset(page);
+ loff_t start = folio_pos(folio);
int bytes_read, ret = 0;
- buffer = kmap_local_page(page);
+ buffer = kmap_local_folio(folio, 0);
bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
PAGE_SIZE);
- if (bytes_read < 0) {
- ClearPageUptodate(page);
- SetPageError(page);
+ if (bytes_read < 0)
ret = bytes_read;
- goto out;
- }
-
- memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read);
-
- ClearPageError(page);
- SetPageUptodate(page);
-
- out:
- flush_dcache_page(page);
+ else
+ buffer = folio_zero_tail(folio, bytes_read, buffer);
kunmap_local(buffer);
- unlock_page(page);
+ folio_end_read(folio, ret == 0);
return ret;
}
@@ -922,10 +924,11 @@ static const struct inode_operations hostfs_link_iops = {
.get_link = hostfs_get_link,
};
-static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
+static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct hostfs_fs_info *fsi = sb->s_fs_info;
+ const char *host_root = fc->source;
struct inode *root_inode;
- char *host_root_path, *req_root = d;
int err;
sb->s_blocksize = 1024;
@@ -939,15 +942,15 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
return err;
/* NULL is printed as '(null)' by printf(): avoid that. */
- if (req_root == NULL)
- req_root = "";
+ if (fc->source == NULL)
+ host_root = "";
- sb->s_fs_info = host_root_path =
- kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root);
- if (host_root_path == NULL)
+ fsi->host_root_path =
+ kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root);
+ if (fsi->host_root_path == NULL)
return -ENOMEM;
- root_inode = hostfs_iget(sb, host_root_path);
+ root_inode = hostfs_iget(sb, fsi->host_root_path);
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
@@ -955,7 +958,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
char *name;
iput(root_inode);
- name = follow_link(host_root_path);
+ name = follow_link(fsi->host_root_path);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -972,11 +975,38 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
return 0;
}
-static struct dentry *hostfs_read_sb(struct file_system_type *type,
- int flags, const char *dev_name,
- void *data)
+static int hostfs_fc_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, hostfs_fill_super);
+}
+
+static void hostfs_fc_free(struct fs_context *fc)
{
- return mount_nodev(type, flags, data, hostfs_fill_sb_common);
+ struct hostfs_fs_info *fsi = fc->s_fs_info;
+
+ if (!fsi)
+ return;
+
+ kfree(fsi->host_root_path);
+ kfree(fsi);
+}
+
+static const struct fs_context_operations hostfs_context_ops = {
+ .get_tree = hostfs_fc_get_tree,
+ .free = hostfs_fc_free,
+};
+
+static int hostfs_init_fs_context(struct fs_context *fc)
+{
+ struct hostfs_fs_info *fsi;
+
+ fsi = kzalloc(sizeof(*fsi), GFP_KERNEL);
+ if (!fsi)
+ return -ENOMEM;
+
+ fc->s_fs_info = fsi;
+ fc->ops = &hostfs_context_ops;
+ return 0;
}
static void hostfs_kill_sb(struct super_block *s)
@@ -986,11 +1016,11 @@ static void hostfs_kill_sb(struct super_block *s)
}
static struct file_system_type hostfs_type = {
- .owner = THIS_MODULE,
- .name = "hostfs",
- .mount = hostfs_read_sb,
- .kill_sb = hostfs_kill_sb,
- .fs_flags = 0,
+ .owner = THIS_MODULE,
+ .name = "hostfs",
+ .init_fs_context = hostfs_init_fs_context,
+ .kill_sb = hostfs_kill_sb,
+ .fs_flags = 0,
};
MODULE_ALIAS_FS("hostfs");
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9184b4584b01..d0edf9ed33b6 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -472,9 +472,8 @@ out:
static int hpfs_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- char *link = page_address(page);
- struct inode *i = page->mapping->host;
+ char *link = folio_address(folio);
+ struct inode *i = folio->mapping->host;
struct fnode *fnode;
struct buffer_head *bh;
int err;
@@ -485,17 +484,9 @@ static int hpfs_symlink_read_folio(struct file *file, struct folio *folio)
goto fail;
err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE);
brelse(bh);
- if (err)
- goto fail;
- hpfs_unlock(i->i_sb);
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
-
fail:
hpfs_unlock(i->i_sb);
- SetPageError(page);
- unlock_page(page);
+ folio_end_read(folio, err == 0);
return err;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 314834a078e9..e73717daa5f9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -793,4 +793,5 @@ static void __exit exit_hpfs_fs(void)
module_init(init_hpfs_fs)
module_exit(exit_hpfs_fs)
+MODULE_DESCRIPTION("OS/2 HPFS file system support");
MODULE_LICENSE("GPL");
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..81dab95f67ed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -73,13 +73,13 @@ enum hugetlb_param {
};
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -1376,15 +1376,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
switch (opt) {
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
- goto bad_val;
+ ctx->uid = result.uid;
return 0;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
- goto bad_val;
+ ctx->gid = result.gid;
return 0;
case Opt_mode:
diff --git a/fs/inode.c b/fs/inode.c
index 3a41f83a4ba5..f356fe2ec2b6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -162,6 +162,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ inode->i_state = 0;
atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
@@ -231,6 +232,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
if (unlikely(security_inode_alloc(inode)))
return -ENOMEM;
+
this_cpu_inc(nr_inodes);
return 0;
@@ -886,36 +888,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
-static void __wait_on_freeing_inode(struct inode *inode);
+static void __wait_on_freeing_inode(struct inode *inode, bool locked);
/*
* Called with the inode lock held.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
- void *data)
+ void *data, bool locked)
{
struct inode *inode = NULL;
+ if (locked)
+ lockdep_assert_held(&inode_hash_lock);
+ else
+ lockdep_assert_not_held(&inode_hash_lock);
+
+ rcu_read_lock();
repeat:
- hlist_for_each_entry(inode, head, i_hash) {
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_sb != sb)
continue;
if (!test(inode, data))
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode);
+ __wait_on_freeing_inode(inode, locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ESTALE);
}
__iget(inode);
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return inode;
}
+ rcu_read_unlock();
return NULL;
}
@@ -924,29 +935,39 @@ repeat:
* iget_locked for details.
*/
static struct inode *find_inode_fast(struct super_block *sb,
- struct hlist_head *head, unsigned long ino)
+ struct hlist_head *head, unsigned long ino,
+ bool locked)
{
struct inode *inode = NULL;
+ if (locked)
+ lockdep_assert_held(&inode_hash_lock);
+ else
+ lockdep_assert_not_held(&inode_hash_lock);
+
+ rcu_read_lock();
repeat:
- hlist_for_each_entry(inode, head, i_hash) {
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_ino != ino)
continue;
if (inode->i_sb != sb)
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode);
+ __wait_on_freeing_inode(inode, locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ESTALE);
}
__iget(inode);
spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return inode;
}
+ rcu_read_unlock();
return NULL;
}
@@ -1004,14 +1025,7 @@ EXPORT_SYMBOL(get_next_ino);
*/
struct inode *new_inode_pseudo(struct super_block *sb)
{
- struct inode *inode = alloc_inode(sb);
-
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_state = 0;
- spin_unlock(&inode->i_lock);
- }
- return inode;
+ return alloc_inode(sb);
}
/**
@@ -1161,7 +1175,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
again:
spin_lock(&inode_hash_lock);
- old = find_inode(inode->i_sb, head, test, data);
+ old = find_inode(inode->i_sb, head, test, data, true);
if (unlikely(old)) {
/*
* Uhhuh, somebody else created the same inode under us.
@@ -1235,7 +1249,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
struct inode *new = alloc_inode(sb);
if (new) {
- new->i_state = 0;
inode = inode_insert5(new, hashval, test, set, data);
if (unlikely(inode != new))
destroy_inode(new);
@@ -1246,6 +1259,47 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
EXPORT_SYMBOL(iget5_locked);
/**
+ * iget5_locked_rcu - obtain an inode from a mounted file system
+ * @sb: super block of file system
+ * @hashval: hash value (usually inode number) to get
+ * @test: callback used for comparisons between inodes
+ * @set: callback used to initialize a new struct inode
+ * @data: opaque data pointer to pass to @test and @set
+ *
+ * This is equivalent to iget5_locked, except the @test callback must
+ * tolerate the inode not being stable, including being mid-teardown.
+ */
+struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *), void *data)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode *inode, *new;
+
+again:
+ inode = find_inode(sb, head, test, data, false);
+ if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
+ wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
+ return inode;
+ }
+
+ new = alloc_inode(sb);
+ if (new) {
+ inode = inode_insert5(new, hashval, test, set, data);
+ if (unlikely(inode != new))
+ destroy_inode(new);
+ }
+ return inode;
+}
+EXPORT_SYMBOL_GPL(iget5_locked_rcu);
+
+/**
* iget_locked - obtain an inode from a mounted file system
* @sb: super block of file system
* @ino: inode number to get
@@ -1263,9 +1317,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
again:
- spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino);
- spin_unlock(&inode_hash_lock);
+ inode = find_inode_fast(sb, head, ino, false);
if (inode) {
if (IS_ERR(inode))
return NULL;
@@ -1283,7 +1335,7 @@ again:
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
- old = find_inode_fast(sb, head, ino);
+ old = find_inode_fast(sb, head, ino, true);
if (!old) {
inode->i_ino = ino;
spin_lock(&inode->i_lock);
@@ -1419,7 +1471,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
struct inode *inode;
spin_lock(&inode_hash_lock);
- inode = find_inode(sb, head, test, data);
+ inode = find_inode(sb, head, test, data, true);
spin_unlock(&inode_hash_lock);
return IS_ERR(inode) ? NULL : inode;
@@ -1474,7 +1526,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
struct inode *inode;
again:
spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino);
+ inode = find_inode_fast(sb, head, ino, true);
spin_unlock(&inode_hash_lock);
if (inode) {
@@ -2235,17 +2287,21 @@ EXPORT_SYMBOL(inode_needs_sync);
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
* will DTRT.
*/
-static void __wait_on_freeing_inode(struct inode *inode)
+static void __wait_on_freeing_inode(struct inode *inode, bool locked)
{
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_hash_lock);
+ rcu_read_unlock();
+ if (locked)
+ spin_unlock(&inode_hash_lock);
schedule();
finish_wait(wq, &wait.wq_entry);
- spin_lock(&inode_hash_lock);
+ if (locked)
+ spin_lock(&inode_hash_lock);
+ rcu_read_lock();
}
static __initdata unsigned long ihash_entries;
@@ -2538,6 +2594,7 @@ bool in_group_or_capable(struct mnt_idmap *idmap,
return true;
return false;
}
+EXPORT_SYMBOL(in_group_or_capable);
/**
* mode_strip_sgid - handle the sgid bit for non-directories
diff --git a/fs/internal.h b/fs/internal.h
index ab2225136f60..cdd73209eecb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -17,6 +17,7 @@ struct fs_context;
struct pipe_inode_info;
struct iov_iter;
struct mnt_idmap;
+struct ns_common;
/*
* block/bdev.c
@@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern const struct dentry_operations ns_dentry_operations;
+int open_namespace(struct ns_common *ns);
/*
* fs/stat.c:
@@ -247,6 +249,8 @@ extern const struct dentry_operations ns_dentry_operations;
int getname_statx_lookup_flags(int flags);
int do_statx(int dfd, struct filename *filename, unsigned int flags,
unsigned int mask, struct statx __user *buffer);
+int do_statx_fd(int fd, unsigned int flags, unsigned int mask,
+ struct statx __user *buffer);
/*
* fs/splice.c:
@@ -321,3 +325,15 @@ struct stashed_operations {
int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
struct path *path);
void stashed_dentry_prune(struct dentry *dentry);
+/**
+ * path_mounted - check whether path is mounted
+ * @path: path to check
+ *
+ * Determine whether @path refers to the root of a mount.
+ *
+ * Return: true if @path is the root of a mount, false if not.
+ */
+static inline bool path_mounted(const struct path *path)
+{
+ return path->mnt->mnt_root == path->dentry;
+}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d46558990279..f420c53d86ac 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -307,8 +307,6 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off,
spin_unlock_irqrestore(&ifs->state_lock, flags);
}
- if (error)
- folio_set_error(folio);
if (finished)
folio_end_read(folio, uptodate);
}
@@ -444,6 +442,24 @@ done:
return pos - orig_pos + plen;
}
+static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
+ struct iomap_readpage_ctx *ctx)
+{
+ struct folio *folio = ctx->cur_folio;
+ size_t offset = offset_in_folio(folio, iter->pos);
+ loff_t length = min_t(loff_t, folio_size(folio) - offset,
+ iomap_length(iter));
+ loff_t done, ret;
+
+ for (done = 0; done < length; done += ret) {
+ ret = iomap_readpage_iter(iter, ctx, done);
+ if (ret <= 0)
+ return ret;
+ }
+
+ return done;
+}
+
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
{
struct iomap_iter iter = {
@@ -459,10 +475,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_readpage_iter(&iter, &ctx, 0);
-
- if (ret < 0)
- folio_set_error(folio);
+ iter.processed = iomap_read_folio_iter(&iter, &ctx);
if (ctx.bio) {
submit_bio(ctx.bio);
@@ -698,7 +711,6 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
if (folio_test_uptodate(folio))
return 0;
- folio_clear_error(folio);
do {
iomap_adjust_read_range(iter->inode, folio, &block_start,
@@ -878,37 +890,22 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t old_size = iter->inode->i_size;
- size_t written;
if (srcmap->type == IOMAP_INLINE) {
iomap_write_end_inline(iter, folio, pos, copied);
- written = copied;
- } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
- written = block_write_end(NULL, iter->inode->i_mapping, pos,
- len, copied, &folio->page, NULL);
- WARN_ON_ONCE(written != copied && written != 0);
- } else {
- written = __iomap_write_end(iter->inode, pos, len, copied,
- folio) ? copied : 0;
+ return true;
}
- /*
- * Update the in-memory inode size after copying the data into the page
- * cache. It's up to the file system to write the updated size to disk,
- * preferably after I/O completion so that no stale data is exposed.
- * Only once that's done can we unlock and release the folio.
- */
- if (pos + written > old_size) {
- i_size_write(iter->inode, pos + written);
- iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
- }
- __iomap_put_folio(iter, pos, written, folio);
+ if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
+ size_t bh_written;
- if (old_size < pos)
- pagecache_isize_extended(iter->inode, old_size, pos);
+ bh_written = block_write_end(NULL, iter->inode->i_mapping, pos,
+ len, copied, &folio->page, NULL);
+ WARN_ON_ONCE(bh_written != copied && bh_written != 0);
+ return bh_written == copied;
+ }
- return written == copied;
+ return __iomap_write_end(iter->inode, pos, len, copied, folio);
}
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
@@ -923,6 +920,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
do {
struct folio *folio;
+ loff_t old_size;
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
@@ -974,6 +972,23 @@ retry:
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
copied : 0;
+ /*
+ * Update the in-memory inode size after copying the data into
+ * the page cache. It's up to the file system to write the
+ * updated size to disk, preferably after I/O completion so that
+ * no stale data is exposed. Only once that's done can we
+ * unlock and release the folio.
+ */
+ old_size = iter->inode->i_size;
+ if (pos + written > old_size) {
+ i_size_write(iter->inode, pos + written);
+ iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ __iomap_put_folio(iter, pos, written, folio);
+
+ if (old_size < pos)
+ pagecache_isize_extended(iter->inode, old_size, pos);
+
cond_resched();
if (unlikely(written == 0)) {
/*
@@ -1344,6 +1359,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
bytes = folio_size(folio) - offset;
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1409,6 +1425,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio);
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1539,8 +1556,6 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
/* walk all folios in bio, ending page IO on them */
bio_for_each_folio_all(fi, bio) {
- if (error)
- folio_set_error(fi.folio);
iomap_finish_folio_write(inode, fi.folio, fi.length);
folio_count++;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 93b1077a380a..ed548efdd9bb 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -326,8 +326,8 @@ static const struct fs_parameter_spec isofs_param_spec[] = {
fsparam_u32 ("session", Opt_session),
fsparam_u32 ("sbsector", Opt_sb),
fsparam_enum ("check", Opt_check, isofs_param_check),
- fsparam_u32 ("uid", Opt_uid),
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
/* Note: mode/dmode historically accepted %u not strictly %o */
fsparam_u32 ("mode", Opt_mode),
fsparam_u32 ("dmode", Opt_dmode),
@@ -344,8 +344,6 @@ static int isofs_parse_param(struct fs_context *fc,
struct isofs_options *popt = fc->fs_private;
struct fs_parse_result result;
int opt;
- kuid_t uid;
- kgid_t gid;
unsigned int n;
/* There are no remountable options */
@@ -409,17 +407,11 @@ static int isofs_parse_param(struct fs_context *fc,
case Opt_ignore:
break;
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return -EINVAL;
- popt->uid = uid;
+ popt->uid = result.uid;
popt->uid_set = 1;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return -EINVAL;
- popt->gid = gid;
+ popt->gid = result.gid;
popt->gid_set = 1;
break;
case Opt_mode:
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index d6c17ad69dee..dbf911126e61 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -688,11 +688,10 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
*/
static int rock_ridge_symlink_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct iso_inode_info *ei = ISOFS_I(inode);
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
- char *link = page_address(page);
+ char *link = folio_address(folio);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
struct buffer_head *bh;
char *rpnt = link;
@@ -779,9 +778,10 @@ repeat:
goto fail;
brelse(bh);
*rpnt = '\0';
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
+ ret = 0;
+end:
+ folio_end_read(folio, ret == 0);
+ return ret;
/* error exit from macro */
out:
@@ -795,9 +795,8 @@ out_bad_span:
fail:
brelse(bh);
error:
- SetPageError(page);
- unlock_page(page);
- return -EIO;
+ ret = -EIO;
+ goto end;
}
const struct address_space_operations isofs_symlink_aops = {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 62ea76da7fdf..e12cb145147e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -95,13 +95,8 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
PAGE_SIZE);
- if (ret) {
- ClearPageUptodate(pg);
- SetPageError(pg);
- } else {
+ if (!ret)
SetPageUptodate(pg);
- ClearPageError(pg);
- }
flush_dcache_page(pg);
kunmap(pg);
@@ -304,10 +299,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
kunmap(pg);
- if (ret) {
- /* There was an error writing. */
- SetPageError(pg);
- }
+ if (ret)
+ mapping_set_error(mapping, ret);
/* Adjust writtenlen for the padding we did, so we don't confuse our caller */
writtenlen -= min(writtenlen, (start - aligned_start));
@@ -330,7 +323,6 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
it gets reread */
jffs2_dbg(1, "%s(): Not all bytes written. Marking page !uptodate\n",
__func__);
- SetPageError(pg);
ClearPageUptodate(pg);
}
diff --git a/fs/libfs.c b/fs/libfs.c
index b635ee5adbcc..8aa34870449f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1854,6 +1854,80 @@ static const struct dentry_operations generic_ci_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
#endif
};
+
+/**
+ * generic_ci_match() - Match a name (case-insensitively) with a dirent.
+ * This is a filesystem helper for comparison with directory entries.
+ * generic_ci_d_compare should be used in VFS' ->d_compare instead.
+ *
+ * @parent: Inode of the parent of the dirent under comparison
+ * @name: name under lookup.
+ * @folded_name: Optional pre-folded name under lookup
+ * @de_name: Dirent name.
+ * @de_name_len: dirent name length.
+ *
+ * Test whether a case-insensitive directory entry matches the filename
+ * being searched. If @folded_name is provided, it is used instead of
+ * recalculating the casefold of @name.
+ *
+ * Return: > 0 if the directory entry matches, 0 if it doesn't match, or
+ * < 0 on error.
+ */
+int generic_ci_match(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *folded_name,
+ const u8 *de_name, u32 de_name_len)
+{
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
+ struct qstr dirent = QSTR_INIT(de_name, de_name_len);
+ int res = 0;
+
+ if (IS_ENCRYPTED(parent)) {
+ const struct fscrypt_str encrypted_name =
+ FSTR_INIT((u8 *) de_name, de_name_len);
+
+ if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
+ return -EINVAL;
+
+ decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+ if (!decrypted_name.name)
+ return -ENOMEM;
+ res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
+ &decrypted_name);
+ if (res < 0) {
+ kfree(decrypted_name.name);
+ return res;
+ }
+ dirent.name = decrypted_name.name;
+ dirent.len = decrypted_name.len;
+ }
+
+ /*
+ * Attempt a case-sensitive match first. It is cheaper and
+ * should cover most lookups, including all the sane
+ * applications that expect a case-sensitive filesystem.
+ */
+
+ if (dirent.len == name->len &&
+ !memcmp(name->name, dirent.name, dirent.len))
+ goto out;
+
+ if (folded_name->name)
+ res = utf8_strncasecmp_folded(um, folded_name, &dirent);
+ else
+ res = utf8_strncasecmp(um, name, &dirent);
+
+out:
+ kfree(decrypted_name.name);
+ if (res < 0 && sb_has_strict_encoding(sb)) {
+ pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
+ return 0;
+ }
+ return !res;
+}
+EXPORT_SYMBOL(generic_ci_match);
#endif
#ifdef CONFIG_FS_ENCRYPTION
diff --git a/fs/locks.c b/fs/locks.c
index 90c8746874de..bdd94c32256f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1367,9 +1367,9 @@ retry:
locks_wake_up_blocks(&left->c);
}
out:
+ trace_posix_lock_inode(inode, request, error);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
- trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
@@ -2448,8 +2448,9 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by releasing the
- * lock that was just acquired. There is no need to do that when we're
+ * Detect close/fcntl races and recover by zapping all POSIX locks
+ * associated with this file and our files_struct, just like on
+ * filp_flush(). There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
if (!error && file_lock->c.flc_type != F_UNLCK &&
@@ -2464,9 +2465,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
- file_lock->c.flc_type = F_UNLCK;
- error = do_lock_file_wait(filp, cmd, file_lock);
- WARN_ON_ONCE(error);
+ locks_remove_posix(filp, files);
error = -EBADF;
}
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 7f9a2d8aa420..1c3df63162ef 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -730,5 +730,6 @@ static void __exit exit_minix_fs(void)
module_init(init_minix_fs)
module_exit(exit_minix_fs)
+MODULE_DESCRIPTION("Minix file system");
MODULE_LICENSE("GPL");
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index d6031acc34f0..a944a0f17b53 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap,
if (!new_de)
goto out_dir;
err = minix_set_link(new_de, new_page, old_inode);
- kunmap(new_page);
- put_page(new_page);
+ unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
diff --git a/fs/mount.h b/fs/mount.h
index 4a42fc68f4cc..ad4b1ddebb54 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
+ struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
+ refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
struct mnt_pcp {
@@ -152,3 +154,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/mpage.c b/fs/mpage.c
index fa8b99a199fa..b5b5ddf9d513 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -48,13 +48,8 @@ static void mpage_read_end_io(struct bio *bio)
struct folio_iter fi;
int err = blk_status_to_errno(bio->bi_status);
- bio_for_each_folio_all(fi, bio) {
- if (err)
- folio_set_error(fi.folio);
- else
- folio_mark_uptodate(fi.folio);
- folio_unlock(fi.folio);
- }
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, err == 0);
bio_put(bio);
}
@@ -65,10 +60,8 @@ static void mpage_write_end_io(struct bio *bio)
int err = blk_status_to_errno(bio->bi_status);
bio_for_each_folio_all(fi, bio) {
- if (err) {
- folio_set_error(fi.folio);
+ if (err)
mapping_set_error(fi.folio->mapping, err);
- }
folio_end_writeback(fi.folio);
}
diff --git a/fs/namei.c b/fs/namei.c
index 37fb0a8aa09a..3a4c40e12f78 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -126,7 +126,7 @@
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
struct filename *
-getname_flags(const char __user *filename, int flags, int *empty)
+getname_flags(const char __user *filename, int flags)
{
struct filename *result;
char *kname;
@@ -148,9 +148,20 @@ getname_flags(const char __user *filename, int flags, int *empty)
result->name = kname;
len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
- if (unlikely(len < 0)) {
- __putname(result);
- return ERR_PTR(len);
+ /*
+ * Handle both empty path and copy failure in one go.
+ */
+ if (unlikely(len <= 0)) {
+ if (unlikely(len < 0)) {
+ __putname(result);
+ return ERR_PTR(len);
+ }
+
+ /* The empty path is special. */
+ if (!(flags & LOOKUP_EMPTY)) {
+ __putname(result);
+ return ERR_PTR(-ENOENT);
+ }
}
/*
@@ -180,6 +191,12 @@ getname_flags(const char __user *filename, int flags, int *empty)
kfree(result);
return ERR_PTR(len);
}
+ /* The empty path is special. */
+ if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(-ENOENT);
+ }
if (unlikely(len == PATH_MAX)) {
__putname(kname);
kfree(result);
@@ -188,16 +205,6 @@ getname_flags(const char __user *filename, int flags, int *empty)
}
atomic_set(&result->refcnt, 1);
- /* The empty path is special. */
- if (unlikely(!len)) {
- if (empty)
- *empty = 1;
- if (!(flags & LOOKUP_EMPTY)) {
- putname(result);
- return ERR_PTR(-ENOENT);
- }
- }
-
result->uptr = filename;
result->aname = NULL;
audit_getname(result);
@@ -209,13 +216,13 @@ getname_uflags(const char __user *filename, int uflags)
{
int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
- return getname_flags(filename, flags, NULL);
+ return getname_flags(filename, flags);
}
struct filename *
getname(const char __user * filename)
{
- return getname_flags(filename, 0, NULL);
+ return getname_flags(filename, 0);
}
struct filename *
@@ -1233,29 +1240,48 @@ int may_linkat(struct mnt_idmap *idmap, const struct path *link)
*
* Returns 0 if the open is allowed, -ve on error.
*/
-static int may_create_in_sticky(struct mnt_idmap *idmap,
- struct nameidata *nd, struct inode *const inode)
+static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd,
+ struct inode *const inode)
{
umode_t dir_mode = nd->dir_mode;
- vfsuid_t dir_vfsuid = nd->dir_vfsuid;
+ vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid;
- if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
- (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
- likely(!(dir_mode & S_ISVTX)) ||
- vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) ||
- vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
+ if (likely(!(dir_mode & S_ISVTX)))
return 0;
- if (likely(dir_mode & 0002) ||
- (dir_mode & 0020 &&
- ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
- (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
- const char *operation = S_ISFIFO(inode->i_mode) ?
- "sticky_create_fifo" :
- "sticky_create_regular";
- audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
+ if (S_ISREG(inode->i_mode) && !sysctl_protected_regular)
+ return 0;
+
+ if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos)
+ return 0;
+
+ i_vfsuid = i_uid_into_vfsuid(idmap, inode);
+
+ if (vfsuid_eq(i_vfsuid, dir_vfsuid))
+ return 0;
+
+ if (vfsuid_eq_kuid(i_vfsuid, current_fsuid()))
+ return 0;
+
+ if (likely(dir_mode & 0002)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create");
return -EACCES;
}
+
+ if (dir_mode & 0020) {
+ if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT,
+ "sticky_create_fifo");
+ return -EACCES;
+ }
+
+ if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) {
+ audit_log_path_denied(AUDIT_ANOM_CREAT,
+ "sticky_create_regular");
+ return -EACCES;
+ }
+ }
+
return 0;
}
@@ -1712,17 +1738,26 @@ static struct dentry *lookup_slow(const struct qstr *name,
}
static inline int may_lookup(struct mnt_idmap *idmap,
- struct nameidata *nd)
+ struct nameidata *restrict nd)
{
- if (nd->flags & LOOKUP_RCU) {
- int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
- if (!err) // success, keep going
- return 0;
- if (!try_to_unlazy(nd))
- return -ECHILD; // redo it all non-lazy
- if (err != -ECHILD) // hard error
- return err;
- }
+ int err, mask;
+
+ mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0;
+ err = inode_permission(idmap, nd->inode, mask | MAY_EXEC);
+ if (likely(!err))
+ return 0;
+
+ // If we failed, and we weren't in LOOKUP_RCU, it's final
+ if (!(nd->flags & LOOKUP_RCU))
+ return err;
+
+ // Drop out of RCU mode to make sure it wasn't transient
+ if (!try_to_unlazy(nd))
+ return -ECHILD; // redo it all non-lazy
+
+ if (err != -ECHILD) // hard error
+ return err;
+
return inode_permission(idmap, nd->inode, MAY_EXEC);
}
@@ -2163,21 +2198,39 @@ EXPORT_SYMBOL(hashlen_string);
/*
* Calculate the length and hash of the path component, and
- * return the "hash_len" as the result.
+ * return the length as the result.
*/
-static inline u64 hash_name(const void *salt, const char *name)
+static inline const char *hash_name(struct nameidata *nd,
+ const char *name,
+ unsigned long *lastword)
{
- unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
+ unsigned long a, b, x, y = (unsigned long)nd->path.dentry;
unsigned long adata, bdata, mask, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
- len = 0;
- goto inside;
+ /*
+ * The first iteration is special, because it can result in
+ * '.' and '..' and has no mixing other than the final fold.
+ */
+ a = load_unaligned_zeropad(name);
+ b = a ^ REPEAT_BYTE('/');
+ if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) {
+ adata = prep_zero_mask(a, adata, &constants);
+ bdata = prep_zero_mask(b, bdata, &constants);
+ mask = create_zero_mask(adata | bdata);
+ a &= zero_bytemask(mask);
+ *lastword = a;
+ len = find_zero(mask);
+ nd->last.hash = fold_hash(a, y);
+ nd->last.len = len;
+ return name + len;
+ }
+ len = 0;
+ x = 0;
do {
HASH_MIX(x, y, a);
len += sizeof(unsigned long);
-inside:
a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/');
} while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
@@ -2185,11 +2238,25 @@ inside:
adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants);
mask = create_zero_mask(adata | bdata);
- x ^= a & zero_bytemask(mask);
+ a &= zero_bytemask(mask);
+ x ^= a;
+ len += find_zero(mask);
+ *lastword = 0; // Multi-word components cannot be DOT or DOTDOT
- return hashlen_create(fold_hash(x, y), len + find_zero(mask));
+ nd->last.hash = fold_hash(x, y);
+ nd->last.len = len;
+ return name + len;
}
+/*
+ * Note that the 'last' word is always zero-masked, but
+ * was loaded as a possibly big-endian word.
+ */
+#ifdef __BIG_ENDIAN
+ #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8))
+ #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16))
+#endif
+
#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
/* Return the hash of a string of known length */
@@ -2222,22 +2289,35 @@ EXPORT_SYMBOL(hashlen_string);
* We know there's a real path component here of at least
* one character.
*/
-static inline u64 hash_name(const void *salt, const char *name)
+static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword)
{
- unsigned long hash = init_name_hash(salt);
- unsigned long len = 0, c;
+ unsigned long hash = init_name_hash(nd->path.dentry);
+ unsigned long len = 0, c, last = 0;
c = (unsigned char)*name;
do {
+ last = (last << 8) + c;
len++;
hash = partial_name_hash(c, hash);
c = (unsigned char)name[len];
} while (c && c != '/');
- return hashlen_create(end_name_hash(hash), len);
+
+ // This is reliable for DOT or DOTDOT, since the component
+ // cannot contain NUL characters - top bits being zero means
+ // we cannot have had any other pathnames.
+ *lastword = last;
+ nd->last.hash = end_name_hash(hash);
+ nd->last.len = len;
+ return name + len;
}
#endif
+#ifndef LAST_WORD_IS_DOT
+ #define LAST_WORD_IS_DOT 0x2e
+ #define LAST_WORD_IS_DOTDOT 0x2e2e
+#endif
+
/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
@@ -2266,45 +2346,38 @@ static int link_path_walk(const char *name, struct nameidata *nd)
for(;;) {
struct mnt_idmap *idmap;
const char *link;
- u64 hash_len;
- int type;
+ unsigned long lastword;
idmap = mnt_idmap(nd->path.mnt);
err = may_lookup(idmap, nd);
if (err)
return err;
- hash_len = hash_name(nd->path.dentry, name);
+ nd->last.name = name;
+ name = hash_name(nd, name, &lastword);
- type = LAST_NORM;
- if (name[0] == '.') switch (hashlen_len(hash_len)) {
- case 2:
- if (name[1] == '.') {
- type = LAST_DOTDOT;
- nd->state |= ND_JUMPED;
- }
- break;
- case 1:
- type = LAST_DOT;
- }
- if (likely(type == LAST_NORM)) {
- struct dentry *parent = nd->path.dentry;
+ switch(lastword) {
+ case LAST_WORD_IS_DOTDOT:
+ nd->last_type = LAST_DOTDOT;
+ nd->state |= ND_JUMPED;
+ break;
+
+ case LAST_WORD_IS_DOT:
+ nd->last_type = LAST_DOT;
+ break;
+
+ default:
+ nd->last_type = LAST_NORM;
nd->state &= ~ND_JUMPED;
+
+ struct dentry *parent = nd->path.dentry;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- struct qstr this = { { .hash_len = hash_len }, .name = name };
- err = parent->d_op->d_hash(parent, &this);
+ err = parent->d_op->d_hash(parent, &nd->last);
if (err < 0)
return err;
- hash_len = this.hash_len;
- name = this.name;
}
}
- nd->last.hash_len = hash_len;
- nd->last.name = name;
- nd->last_type = type;
-
- name += hashlen_len(hash_len);
if (!*name)
goto OK;
/*
@@ -2922,16 +2995,16 @@ int path_pts(struct path *path)
}
#endif
-int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
- struct path *path, int *empty)
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+ struct path *path)
{
- struct filename *filename = getname_flags(name, flags, empty);
+ struct filename *filename = getname_flags(name, flags);
int ret = filename_lookup(dfd, filename, flags, path, NULL);
putname(filename);
return ret;
}
-EXPORT_SYMBOL(user_path_at_empty);
+EXPORT_SYMBOL(user_path_at);
int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
struct inode *inode)
@@ -3572,8 +3645,12 @@ static const char *open_last_lookups(struct nameidata *nd,
else
inode_lock_shared(dir->d_inode);
dentry = lookup_open(nd, file, op, got_write);
- if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
- fsnotify_create(dir->d_inode, dentry);
+ if (!IS_ERR(dentry)) {
+ if (file->f_mode & FMODE_CREATED)
+ fsnotify_create(dir->d_inode, dentry);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
+ }
if (open_flag & O_CREAT)
inode_unlock(dir->d_inode);
else
@@ -3700,6 +3777,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
error = dir->i_op->tmpfile(idmap, dir, file, mode);
dput(child);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
if (error)
return error;
/* Don't check for other permissions, the inode was just created */
diff --git a/fs/namespace.c b/fs/namespace.c
index 5a51315c6678..221db9de4729 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
-static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);
+#define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
+static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
@@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+static DEFINE_RWLOCK(mnt_ns_tree_lock);
+static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr {
unsigned int attr_set;
@@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
+{
+ u64 seq_b = ns->seq;
+
+ if (seq < seq_b)
+ return -1;
+ if (seq > seq_b)
+ return 1;
+ return 0;
+}
+
+static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
+{
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
+}
+
+static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
+{
+ struct mnt_namespace *ns_a = node_to_mnt_ns(a);
+ struct mnt_namespace *ns_b = node_to_mnt_ns(b);
+ u64 seq_a = ns_a->seq;
+
+ return mnt_ns_cmp(seq_a, ns_b) < 0;
+}
+
+static void mnt_ns_tree_add(struct mnt_namespace *ns)
+{
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
+}
+
+static void mnt_ns_release(struct mnt_namespace *ns)
+{
+ lockdep_assert_not_held(&mnt_ns_tree_lock);
+
+ /* keep alive for {list,stat}mount() */
+ if (refcount_dec_and_test(&ns->passive)) {
+ put_user_ns(ns->user_ns);
+ kfree(ns);
+ }
+}
+DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
+
+static void mnt_ns_tree_remove(struct mnt_namespace *ns)
+{
+ /* remove from global mount namespace list */
+ if (!is_anon_ns(ns)) {
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
+ }
+
+ mnt_ns_release(ns);
+}
+
+/*
+ * Returns the mount namespace which either has the specified id, or has the
+ * next smallest id afer the specified one.
+ */
+static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
+{
+ struct rb_node *node = mnt_ns_tree.rb_node;
+ struct mnt_namespace *ret = NULL;
+
+ lockdep_assert_held(&mnt_ns_tree_lock);
+
+ while (node) {
+ struct mnt_namespace *n = node_to_mnt_ns(node);
+
+ if (mnt_ns_id <= n->seq) {
+ ret = node_to_mnt_ns(node);
+ if (mnt_ns_id == n->seq)
+ break;
+ node = node->rb_left;
+ } else {
+ node = node->rb_right;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lookup a mount namespace by id and take a passive reference count. Taking a
+ * passive reference means the mount namespace can be emptied if e.g., the last
+ * task holding an active reference exits. To access the mounts of the
+ * namespace the @namespace_sem must first be acquired. If the namespace has
+ * already shut down before acquiring @namespace_sem, {list,stat}mount() will
+ * see that the mount rbtree of the namespace is empty.
+ */
+static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
+{
+ struct mnt_namespace *ns;
+
+ guard(read_lock)(&mnt_ns_tree_lock);
+ ns = mnt_ns_find_id_at(mnt_ns_id);
+ if (!ns || ns->seq != mnt_ns_id)
+ return NULL;
+
+ refcount_inc(&ns->passive);
+ return ns;
+}
+
static inline void lock_mount_hash(void)
{
write_seqlock(&mount_lock);
@@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
return ret;
}
+/*
+ * Returns the mount which either has the specified mnt_id, or has the next
+ * greater id before the specified one.
+ */
+static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id)
+{
+ struct rb_node *node = ns->mounts.rb_node;
+ struct mount *ret = NULL;
+
+ while (node) {
+ struct mount *m = node_to_mount(node);
+
+ if (mnt_id >= m->mnt_id_unique) {
+ ret = node_to_mount(node);
+ if (mnt_id == m->mnt_id_unique)
+ break;
+ node = node->rb_right;
+ } else {
+ node = node->rb_left;
+ }
+ }
+ return ret;
+}
+
#ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */
@@ -1846,19 +1976,6 @@ bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
-/**
- * path_mounted - check whether path is mounted
- * @path: path to check
- *
- * Determine whether @path refers to the root of a mount.
- *
- * Return: true if @path is the root of a mount, false if not.
- */
-static inline bool path_mounted(const struct path *path)
-{
- return path->mnt->mnt_root == path->dentry;
-}
-
static void warn_mandlock(void)
{
pr_warn_once("=======================================================\n"
@@ -1966,69 +2083,72 @@ static bool mnt_ns_loop(struct dentry *dentry)
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
-struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
+struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
int flag)
{
- struct mount *res, *p, *q, *r, *parent;
+ struct mount *res, *src_parent, *src_root_child, *src_mnt,
+ *dst_parent, *dst_mnt;
- if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
+ if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(src_root))
return ERR_PTR(-EINVAL);
if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);
- res = q = clone_mnt(mnt, dentry, flag);
- if (IS_ERR(q))
- return q;
+ res = dst_mnt = clone_mnt(src_root, dentry, flag);
+ if (IS_ERR(dst_mnt))
+ return dst_mnt;
- q->mnt_mountpoint = mnt->mnt_mountpoint;
+ src_parent = src_root;
+ dst_mnt->mnt_mountpoint = src_root->mnt_mountpoint;
- p = mnt;
- list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
- struct mount *s;
- if (!is_subdir(r->mnt_mountpoint, dentry))
+ list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) {
+ if (!is_subdir(src_root_child->mnt_mountpoint, dentry))
continue;
- for (s = r; s; s = next_mnt(s, r)) {
+ for (src_mnt = src_root_child; src_mnt;
+ src_mnt = next_mnt(src_mnt, src_root_child)) {
if (!(flag & CL_COPY_UNBINDABLE) &&
- IS_MNT_UNBINDABLE(s)) {
- if (s->mnt.mnt_flags & MNT_LOCKED) {
+ IS_MNT_UNBINDABLE(src_mnt)) {
+ if (src_mnt->mnt.mnt_flags & MNT_LOCKED) {
/* Both unbindable and locked. */
- q = ERR_PTR(-EPERM);
+ dst_mnt = ERR_PTR(-EPERM);
goto out;
} else {
- s = skip_mnt_tree(s);
+ src_mnt = skip_mnt_tree(src_mnt);
continue;
}
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
- is_mnt_ns_file(s->mnt.mnt_root)) {
- s = skip_mnt_tree(s);
+ is_mnt_ns_file(src_mnt->mnt.mnt_root)) {
+ src_mnt = skip_mnt_tree(src_mnt);
continue;
}
- while (p != s->mnt_parent) {
- p = p->mnt_parent;
- q = q->mnt_parent;
+ while (src_parent != src_mnt->mnt_parent) {
+ src_parent = src_parent->mnt_parent;
+ dst_mnt = dst_mnt->mnt_parent;
}
- p = s;
- parent = q;
- q = clone_mnt(p, p->mnt.mnt_root, flag);
- if (IS_ERR(q))
+
+ src_parent = src_mnt;
+ dst_parent = dst_mnt;
+ dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag);
+ if (IS_ERR(dst_mnt))
goto out;
lock_mount_hash();
- list_add_tail(&q->mnt_list, &res->mnt_list);
- attach_mnt(q, parent, p->mnt_mp, false);
+ list_add_tail(&dst_mnt->mnt_list, &res->mnt_list);
+ attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp, false);
unlock_mount_hash();
}
}
return res;
+
out:
if (res) {
lock_mount_hash();
umount_tree(res, UMOUNT_SYNC);
unlock_mount_hash();
}
- return q;
+ return dst_mnt;
}
/* Caller should check returned pointer for errors */
@@ -2078,7 +2198,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -3709,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns))
ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts);
- put_user_ns(ns->user_ns);
- kfree(ns);
+ mnt_ns_tree_remove(ns);
}
/*
@@ -3749,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1);
+ refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
+ RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -3826,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
+ mnt_ns_tree_add(new_ns);
namespace_unlock();
if (rootmnt)
@@ -4843,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
+{
+ s->sm.mask |= STATMOUNT_MNT_NS_ID;
+ s->sm.mnt_ns_id = ns->seq;
+}
+
+static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
+{
+ struct vfsmount *mnt = s->mnt;
+ struct super_block *sb = mnt->mnt_sb;
+ int err;
+
+ if (sb->s_op->show_options) {
+ size_t start = seq->count;
+
+ err = sb->s_op->show_options(seq, mnt->mnt_root);
+ if (err)
+ return err;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ if (seq->count == start)
+ return 0;
+
+ /* skip leading comma */
+ memmove(seq->buf + start, seq->buf + start + 1,
+ seq->count - start - 1);
+ seq->count--;
+ }
+
+ return 0;
+}
+
static int statmount_string(struct kstatmount *s, u64 flag)
{
int ret;
@@ -4863,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag)
sm->mnt_point = seq->count;
ret = statmount_mnt_point(s, seq);
break;
+ case STATMOUNT_MNT_OPTS:
+ sm->mnt_opts = seq->count;
+ ret = statmount_mnt_opts(s, seq);
+ break;
default:
WARN_ON_ONCE(true);
return -EINVAL;
@@ -4903,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s)
return 0;
}
-static int do_statmount(struct kstatmount *s)
+static struct mount *listmnt_next(struct mount *curr, bool reverse)
{
- struct mount *m = real_mount(s->mnt);
+ struct rb_node *node;
+
+ if (reverse)
+ node = rb_prev(&curr->mnt_node);
+ else
+ node = rb_next(&curr->mnt_node);
+
+ return node_to_mount(node);
+}
+
+static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
+{
+ struct mount *first, *child;
+
+ rwsem_assert_held(&namespace_sem);
+
+ /* We're looking at our own ns, just use get_fs_root. */
+ if (ns == current->nsproxy->mnt_ns) {
+ get_fs_root(current->fs, root);
+ return 0;
+ }
+
+ /*
+ * We have to find the first mount in our ns and use that, however it
+ * may not exist, so handle that properly.
+ */
+ if (RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ first = child = ns->root;
+ for (;;) {
+ child = listmnt_next(child, false);
+ if (!child)
+ return -ENOENT;
+ if (child->mnt_parent == first)
+ break;
+ }
+
+ root->mnt = mntget(&child->mnt);
+ root->dentry = dget(root->mnt->mnt_root);
+ return 0;
+}
+
+static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
+ struct mnt_namespace *ns)
+{
+ struct path root __free(path_put) = {};
+ struct mount *m;
int err;
+ /* Has the namespace already been emptied? */
+ if (mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ s->mnt = lookup_mnt_in_ns(mnt_id, ns);
+ if (!s->mnt)
+ return -ENOENT;
+
+ err = grab_requested_root(ns, &root);
+ if (err)
+ return err;
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ m = real_mount(s->mnt);
+ if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
return err;
+ s->root = root;
if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s);
@@ -4938,6 +5159,12 @@ static int do_statmount(struct kstatmount *s)
if (!err && s->mask & STATMOUNT_MNT_POINT)
err = statmount_string(s, STATMOUNT_MNT_POINT);
+ if (!err && s->mask & STATMOUNT_MNT_OPTS)
+ err = statmount_string(s, STATMOUNT_MNT_OPTS);
+
+ if (!err && s->mask & STATMOUNT_MNT_NS_ID)
+ statmount_mnt_ns_id(s, ns);
+
if (err)
return err;
@@ -4955,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
return true;
}
+#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
+ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS)
+
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize,
size_t seq_size)
@@ -4966,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
ks->mask = kreq->param;
ks->buf = buf;
ks->bufsize = bufsize;
- ks->seq.size = seq_size;
- ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
- if (!ks->seq.buf)
- return -ENOMEM;
+
+ if (ks->mask & STATMOUNT_STRING_REQ) {
+ if (bufsize == sizeof(ks->sm))
+ return -EOVERFLOW;
+
+ ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
+ if (!ks->seq.buf)
+ return -ENOMEM;
+
+ ks->seq.size = seq_size;
+ }
+
return 0;
}
@@ -4979,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
int ret;
size_t usize;
- BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0);
+ BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1);
ret = get_user(usize, &req->size);
if (ret)
@@ -4994,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
return ret;
if (kreq->spare != 0)
return -EINVAL;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
return 0;
}
+/*
+ * If the user requested a specific mount namespace id, look that up and return
+ * that, or if not simply grab a passive reference on our mount namespace and
+ * return that.
+ */
+static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id)
+{
+ if (mnt_ns_id)
+ return lookup_mnt_ns(mnt_ns_id);
+ refcount_inc(&current->nsproxy->mnt_ns->passive);
+ return current->nsproxy->mnt_ns;
+}
+
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags)
{
- struct vfsmount *mnt;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
+ struct kstatmount *ks __free(kfree) = NULL;
struct mnt_id_req kreq;
- struct kstatmount ks;
/* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX;
int ret;
@@ -5015,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
+ if (!ks)
+ return -ENOMEM;
+
retry:
- ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size);
+ ret = prepare_kstatmount(ks, &kreq, buf, bufsize, seq_size);
if (ret)
return ret;
- down_read(&namespace_sem);
- mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
- if (!mnt) {
- up_read(&namespace_sem);
- kvfree(ks.seq.buf);
- return -ENOENT;
- }
-
- ks.mnt = mnt;
- get_fs_root(current->fs, &ks.root);
- ret = do_statmount(&ks);
- path_put(&ks.root);
- up_read(&namespace_sem);
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
if (!ret)
- ret = copy_statmount_to_user(&ks);
- kvfree(ks.seq.buf);
+ ret = copy_statmount_to_user(ks);
+ kvfree(ks->seq.buf);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
}
-static struct mount *listmnt_next(struct mount *curr)
+static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
+ u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
+ bool reverse)
{
- return node_to_mount(rb_next(&curr->mnt_node));
-}
-
-static ssize_t do_listmount(struct mount *first, struct path *orig,
- u64 mnt_parent_id, u64 __user *mnt_ids,
- size_t nr_mnt_ids, const struct path *root)
-{
- struct mount *r;
+ struct path root __free(path_put) = {};
+ struct path orig;
+ struct mount *r, *first;
ssize_t ret;
+ rwsem_assert_held(&namespace_sem);
+
+ ret = grab_requested_root(ns, &root);
+ if (ret)
+ return ret;
+
+ if (mnt_parent_id == LSMT_ROOT) {
+ orig = root;
+ } else {
+ orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
+ if (!orig.mnt)
+ return -ENOENT;
+ orig.dentry = orig.mnt->mnt_root;
+ }
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
- ret = security_sb_statfs(orig->dentry);
+ ret = security_sb_statfs(orig.dentry);
if (ret)
return ret;
- for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) {
+ if (!last_mnt_id) {
+ if (reverse)
+ first = node_to_mount(rb_last(&ns->mounts));
+ else
+ first = node_to_mount(rb_first(&ns->mounts));
+ } else {
+ if (reverse)
+ first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);
+ else
+ first = mnt_find_id_at(ns, last_mnt_id + 1);
+ }
+
+ for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r, reverse)) {
if (r->mnt_id_unique == mnt_parent_id)
continue;
- if (!is_path_reachable(r, r->mnt.mnt_root, orig))
+ if (!is_path_reachable(r, r->mnt.mnt_root, &orig))
continue;
- if (put_user(r->mnt_id_unique, mnt_ids))
- return -EFAULT;
+ *mnt_ids = r->mnt_id_unique;
mnt_ids++;
nr_mnt_ids--;
ret++;
@@ -5080,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig,
return ret;
}
-SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
- mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
+SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
+ u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
- struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ u64 *kmnt_ids __free(kvfree) = NULL;
+ const size_t maxcount = 1000000;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq;
- struct mount *first;
- struct path root, orig;
- u64 mnt_parent_id, last_mnt_id;
- const size_t maxcount = (size_t)-1 >> 3;
+ u64 last_mnt_id;
ssize_t ret;
- if (flags)
+ if (flags & ~LISTMOUNT_REVERSE)
return -EINVAL;
+ /*
+ * If the mount namespace really has more than 1 million mounts the
+ * caller must iterate over the mount namespace (and reconsider their
+ * system design...).
+ */
if (unlikely(nr_mnt_ids > maxcount))
- return -EFAULT;
+ return -EOVERFLOW;
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT;
@@ -5103,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
ret = copy_mnt_id_req(req, &kreq);
if (ret)
return ret;
- mnt_parent_id = kreq.mnt_id;
+
last_mnt_id = kreq.param;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
- down_read(&namespace_sem);
- get_fs_root(current->fs, &root);
- if (mnt_parent_id == LSMT_ROOT) {
- orig = root;
- } else {
- ret = -ENOENT;
- orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
- if (!orig.mnt)
- goto err;
- orig.dentry = orig.mnt->mnt_root;
- }
- if (!last_mnt_id)
- first = node_to_mount(rb_first(&ns->mounts));
- else
- first = mnt_find_id_at(ns, last_mnt_id + 1);
+ kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
+ GFP_KERNEL_ACCOUNT);
+ if (!kmnt_ids)
+ return -ENOMEM;
+
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
+ nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
+ if (ret <= 0)
+ return ret;
+
+ if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
+ return -EFAULT;
- ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
-err:
- path_put(&root);
- up_read(&namespace_sem);
return ret;
}
-
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
@@ -5157,6 +5443,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
+
+ mnt_ns_tree_add(ns);
}
void __init mnt_init(void)
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a6bb03bea920..4c0401dbbfcf 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");
+ kdebug("no unlock");
else
folio_unlock(folio);
}
@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
- _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
+ kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
@@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_file_pos(folio), folio_size(folio),
@@ -508,7 +508,7 @@ retry:
have_folio:
*_folio = folio;
- _leave(" = 0");
+ kleave(" = 0");
return 0;
error_put:
@@ -518,7 +518,7 @@ error:
folio_unlock(folio);
folio_put(folio);
}
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
- _enter("%zx @%llx", flen, start);
+ kenter("%zx @%llx", flen, start);
ret = -ENOMEM;
@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 07bc1fd43530..ecbc99ec7d36 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);
- _enter("");
+ kenter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
- _debug("howto %u", howto);
+ kdebug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
- _debug("prefetch = %zd", ret);
+ kdebug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
@@ -418,7 +418,7 @@ out:
}
iocb->ki_pos += written;
- _leave(" = %zd [%zd]", written, ret);
+ kleave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
- _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
@@ -523,17 +523,23 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
struct netfs_group *group;
struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
struct inode *inode = file_inode(file);
struct netfs_inode *ictx = netfs_inode(inode);
vm_fault_t ret = VM_FAULT_RETRY;
int err;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
if (folio_lock_killable(folio) < 0)
goto out;
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
if (folio_wait_writeback_killable(folio)) {
ret = VM_FAULT_LOCKED;
@@ -549,9 +555,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
group = netfs_folio_group(folio);
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
- err = filemap_fdatawait_range(inode->i_mapping,
- folio_pos(folio),
- folio_pos(folio) + folio_size(folio));
+ err = filemap_fdatawrite_range(mapping,
+ folio_pos(folio),
+ folio_pos(folio) + folio_size(folio));
switch (err) {
case 0:
ret = VM_FAULT_RETRY;
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 10a1e4da6bda..b6debac6205f 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
if (!orig_count)
return 0; /* Don't update atime */
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index e14cd53ac9fd..792ef17bae21 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
- _debug("uw %llx-%llx", start, end);
+ kdebug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
@@ -92,10 +92,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (async)
wreq->iocb = iocb;
+ wreq->len = iov_iter_count(&wreq->io_iter);
wreq->cleanup = netfs_cleanup_dio_write;
- ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter));
+ ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
- _debug("begin = %zd", ret);
+ kdebug("begin = %zd", ret);
goto out;
}
@@ -142,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
- _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..288a73c3072d 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
- _enter("{%s,%s}", ops->name, cache->name);
+ kenter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
- _leave(" = 0 [%s]", cache->name);
+ kleave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..4d1e8bf4c615 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
- _enter("V=%x", volume->debug_id);
+ kenter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
- _leave(" = c=%08x", cookie->debug_id);
+ kleave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
- _enter("");
+ kenter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
- _leave(" [fail]");
+ kleave(" [fail]");
goto out;
}
@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
- _enter("c=%08x", cookie->debug_id);
+ kenter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
@@ -636,7 +636,7 @@ again:
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
- _leave("");
+ kleave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
@@ -820,7 +820,7 @@ out:
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
- _leave("");
+ kleave("");
}
static void fscache_cookie_worker(struct work_struct *work)
@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
- _debug("lru c=%x", cookie->debug_id);
+ kdebug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
- _enter("c=%08x{%d},%d",
+ kenter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
- _leave(" [no %u]", cookie->state);
+ kleave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
- _leave(" [look %x]", cookie->inval_counter);
+ kleave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
- _leave(" [inv]");
+ kleave(" [inv]");
return;
}
}
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..bf4eaeec44fb 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
- _leave(" [broken]");
+ kleave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
@@ -52,7 +52,7 @@ again:
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
- _leave(" [not live]");
+ kleave(" [not live]");
return false;
}
@@ -92,7 +92,7 @@ again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -140,7 +140,7 @@ failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
- _leave(" = -ENOBUFS");
+ kleave(" = -ENOBUFS");
return -ENOBUFS;
}
@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
- _enter("%llx,%zx", start, len);
+ kenter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..bf9b33d26e31 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -99,7 +99,7 @@ error_wq:
*/
void __exit fscache_exit(void)
{
- _enter("");
+ kenter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..2e2a405ca9b0 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
return volume;
}
+struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where)
+{
+ int ref;
+
+ if (!__refcount_inc_not_zero(&volume->ref, &ref))
+ return NULL;
+
+ trace_fscache_volume(volume->debug_id, ref + 1, where);
+ return volume;
+}
+EXPORT_SYMBOL(fscache_try_get_volume);
+
static void fscache_see_volume(struct fscache_volume *volume,
enum fscache_volume_trace where)
{
@@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
- _leave(" = v=%x", volume->debug_id);
+ kleave(" = v=%x", volume->debug_id);
return volume;
err_vol:
@@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume,
fscache_free_volume(volume);
}
}
+EXPORT_SYMBOL(fscache_put_volume);
/*
* Relinquish a volume representation cookie.
@@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
- _debug("withdraw V=%x", volume->debug_id);
+ kdebug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 95e281a8af78..21e46bc9aa49 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
-extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
@@ -63,15 +62,6 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
/*
* misc.c
*/
-#define NETFS_FLAG_PUT_MARK BIT(0)
-#define NETFS_FLAG_PAGECACHE_MARK BIT(1)
-int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
- struct folio *folio, unsigned int flags,
- gfp_t gfp_mask);
-int netfs_add_folios_to_buffer(struct xarray *buffer,
- struct address_space *mapping,
- pgoff_t index, pgoff_t to, gfp_t gfp_mask);
-void netfs_clear_buffer(struct xarray *buffer);
/*
* objects.c
@@ -353,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops;
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
bool fscache_begin_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
@@ -365,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
- printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+ pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_NETFS_DEBUG)
-#define _enter(FMT, ...) \
-do { \
- if (netfs_debug) \
- kenter(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _leave(FMT, ...) \
-do { \
- if (netfs_debug) \
- kleave(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _debug(FMT, ...) \
-do { \
- if (netfs_debug) \
- kdebug(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
/*
* assertions
*/
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..c7576481c321 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("R=%x[%x]{%llx,%lx},%zd",
+ kenter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
- _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
+ kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
+ kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
- _enter("R=%x %llx-%llx",
+ kenter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- _debug("submit %llx + %llx >= %llx",
+ kdebug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d21..db824c372842 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -20,10 +20,6 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
-unsigned netfs_debug;
-module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
-
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index bc1fc54fb724..172808e83ca8 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -8,87 +8,6 @@
#include <linux/swap.h>
#include "internal.h"
-/*
- * Attach a folio to the buffer and maybe set marks on it to say that we need
- * to put the folio later and twiddle the pagecache flags.
- */
-int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
- struct folio *folio, unsigned int flags,
- gfp_t gfp_mask)
-{
- XA_STATE_ORDER(xas, xa, index, folio_order(folio));
-
-retry:
- xas_lock(&xas);
- for (;;) {
- xas_store(&xas, folio);
- if (!xas_error(&xas))
- break;
- xas_unlock(&xas);
- if (!xas_nomem(&xas, gfp_mask))
- return xas_error(&xas);
- goto retry;
- }
-
- if (flags & NETFS_FLAG_PUT_MARK)
- xas_set_mark(&xas, NETFS_BUF_PUT_MARK);
- if (flags & NETFS_FLAG_PAGECACHE_MARK)
- xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK);
- xas_unlock(&xas);
- return xas_error(&xas);
-}
-
-/*
- * Create the specified range of folios in the buffer attached to the read
- * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that
- * these need freeing later.
- */
-int netfs_add_folios_to_buffer(struct xarray *buffer,
- struct address_space *mapping,
- pgoff_t index, pgoff_t to, gfp_t gfp_mask)
-{
- struct folio *folio;
- int ret;
-
- if (to + 1 == index) /* Page range is inclusive */
- return 0;
-
- do {
- /* TODO: Figure out what order folio can be allocated here */
- folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0);
- if (!folio)
- return -ENOMEM;
- folio->index = index;
- ret = netfs_xa_store_and_mark(buffer, index, folio,
- NETFS_FLAG_PUT_MARK, gfp_mask);
- if (ret < 0) {
- folio_put(folio);
- return ret;
- }
-
- index += folio_nr_pages(folio);
- } while (index <= to && index != 0);
-
- return 0;
-}
-
-/*
- * Clear an xarray buffer, putting a ref on the folios that have
- * NETFS_BUF_PUT_MARK set.
- */
-void netfs_clear_buffer(struct xarray *buffer)
-{
- struct folio *folio;
- XA_STATE(xas, buffer, 0);
-
- rcu_read_lock();
- xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) {
- folio_put(folio);
- }
- rcu_read_unlock();
- xa_destroy(buffer);
-}
-
/**
* netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback
* @mapping: The mapping the folio belongs to.
@@ -107,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
- _enter("");
+ kenter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
@@ -180,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
- _enter("{%lx},%zx,%zx", folio->index, offset, length);
+ kenter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 426cf87aaf2e..488147439fe0 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+ kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
- _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
@@ -409,7 +409,7 @@ reassess_streams:
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
- //_debug("sreq [%x] %llx %zx/%zx",
+ //kdebug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
@@ -598,7 +598,7 @@ reassess_streams:
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
- _leave(" = %x", notes);
+ kleave(" = %x", notes);
return;
need_retry:
@@ -606,7 +606,7 @@ need_retry:
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
- _debug("retry");
+ kdebug("retry");
netfs_retry_writes(wreq);
goto out;
}
@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
- _debug("finished");
+ kdebug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
- _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+ kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 3aa86e268f40..d7c971df8866 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
@@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
- _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+ kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
- _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+ kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
@@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
- _leave("no write");
+ kleave("no write");
return len;
}
- _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+ kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
@@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
- _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
@@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
- _enter("");
+ kenter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
@@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
- _debug("beyond eof");
+ kdebug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
@@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
- _debug("folio %zx %zx %zx", foff, flen, fsize);
+ kdebug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
@@ -483,11 +483,11 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (!debug)
kdebug("R=%x: No submit", wreq->debug_id);
- if (flen < fsize)
+ if (foff + flen < fsize)
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
- _leave(" = 0");
+ kleave(" = 0");
return 0;
}
@@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
- _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+ kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
@@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
@@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
- _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
@@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
@@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
- _enter("%zx", len);
+ kenter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
@@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
- _debug("unbuffered %zx", len);
+ kdebug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
@@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bb2f583eb28b..90079ca134dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -141,8 +141,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a142287d86f6..cca80b5f54e0 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct folio *folio = nfs_page_to_folio(req);
- if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- folio_set_error(folio);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
if (nfs_netfs_folio_unlock(folio))
folio_unlock(folio);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 13818129d268..1c62a5a9f51d 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -32,15 +32,7 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio)
int error;
error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
- if (error < 0)
- goto error;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-error:
- folio_set_error(folio);
- folio_unlock(folio);
+ folio_end_read(folio, error == 0);
return error;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2329cbb0e446..a91463ab87a0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -311,7 +311,6 @@ static void nfs_mapping_set_error(struct folio *folio, int error)
{
struct address_space *mapping = folio_file_mapping(folio);
- folio_set_error(folio);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 62d2586d9902..529a75ecf22e 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -44,9 +44,7 @@ static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_AD
static const struct genl_split_ops nfsd_nl_ops[] = {
{
.cmd = NFSD_CMD_RPC_STATUS_GET,
- .start = nfsd_nl_rpc_status_get_start,
.dumpit = nfsd_nl_rpc_status_get_dumpit,
- .done = nfsd_nl_rpc_status_get_done,
.flags = GENL_CMD_CAP_DUMP,
},
{
diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h
index e3724637d64d..2e132ef328f8 100644
--- a/fs/nfsd/netlink.h
+++ b/fs/nfsd/netlink.h
@@ -15,9 +15,6 @@
extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1];
extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1];
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb);
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb);
-
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 202140df8f82..c848ebe5d08f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1460,28 +1460,6 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-/**
- * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit
- * @cb: netlink metadata and command arguments
- *
- * Return values:
- * %0: The rpc_status_get command may proceed
- * %-ENODEV: There is no NFSD running in this namespace
- */
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb)
-{
- struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id);
- int ret = -ENODEV;
-
- mutex_lock(&nfsd_mutex);
- if (nn->nfsd_serv)
- ret = 0;
- else
- mutex_unlock(&nfsd_mutex);
-
- return ret;
-}
-
static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
struct netlink_callback *cb,
struct nfsd_genl_rqstp *rqstp)
@@ -1558,8 +1536,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id);
int i, ret, rqstp_index = 0;
+ struct nfsd_net *nn;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(sock_net(skb->sk), nfsd_net_id);
+ if (!nn->nfsd_serv) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
rcu_read_lock();
@@ -1636,22 +1622,10 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
ret = skb->len;
out:
rcu_read_unlock();
-
- return ret;
-}
-
-/**
- * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing
- * @cb: netlink metadata and command arguments
- *
- * Return values:
- * %0: Success
- */
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
-{
+out_unlock:
mutex_unlock(&nfsd_mutex);
- return 0;
+ return ret;
}
/**
@@ -2195,6 +2169,8 @@ static __net_init int nfsd_net_init(struct net *net)
nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ nn->nfsd_info.mutex = &nfsd_mutex;
+ nn->nfsd_serv = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0b75305fb5f5..dd4e11a703aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -247,7 +247,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry = dget(exp->ex_path.dentry);
else {
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
- data_left, fileid_type,
+ data_left, fileid_type, 0,
nfsd_acceptable, exp);
if (IS_ERR_OR_NULL(dentry)) {
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cd9a6a1a9fc8..89d7918de7b1 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -672,7 +672,6 @@ int nfsd_create_serv(struct net *net)
return error;
}
spin_lock(&nfsd_notifier_lock);
- nn->nfsd_info.mutex = &nfsd_mutex;
nn->nfsd_serv = serv;
spin_unlock(&nfsd_notifier_lock);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 89caef7513db..ba50388ee4bf 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -377,11 +377,12 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
* @target: offset number of an entry in the group (start point)
* @bsize: size in bits
* @lock: spin lock protecting @bitmap
+ * @wrap: whether to wrap around
*/
static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
unsigned long target,
unsigned int bsize,
- spinlock_t *lock)
+ spinlock_t *lock, bool wrap)
{
int pos, end = bsize;
@@ -397,6 +398,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
end = target;
}
+ if (!wrap)
+ return -ENOSPC;
/* wrap around */
for (pos = 0; pos < end; pos++) {
@@ -495,9 +498,10 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
+ * @wrap: whether to wrap around
*/
int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
- struct nilfs_palloc_req *req)
+ struct nilfs_palloc_req *req, bool wrap)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
@@ -516,7 +520,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
entries_per_group = nilfs_palloc_entries_per_group(inode);
for (i = 0; i < ngroups; i += n) {
- if (group >= ngroups) {
+ if (group >= ngroups && wrap) {
/* wrap around */
group = 0;
maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
@@ -550,7 +554,14 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
bitmap_kaddr = kmap_local_page(bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
pos = nilfs_palloc_find_available_slot(
- bitmap, group_offset, entries_per_group, lock);
+ bitmap, group_offset, entries_per_group, lock,
+ wrap);
+ /*
+ * Since the search for a free slot in the second and
+ * subsequent bitmap blocks always starts from the
+ * beginning, the wrap flag only has an effect on the
+ * first search.
+ */
kunmap_local(bitmap_kaddr);
if (pos >= 0)
goto found;
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index b667e869ac07..d825a9faca6d 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -50,8 +50,8 @@ struct nilfs_palloc_req {
struct buffer_head *pr_entry_bh;
};
-int nilfs_palloc_prepare_alloc_entry(struct inode *,
- struct nilfs_palloc_req *);
+int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
+ struct nilfs_palloc_req *req, bool wrap);
void nilfs_palloc_commit_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 180fc8d36213..fc1caf63a42a 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
- ret = nilfs_palloc_prepare_alloc_entry(dat, req);
+ ret = nilfs_palloc_prepare_alloc_entry(dat, req, true);
if (ret < 0)
return ret;
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 52e50b1b7f22..4a29b0138d75 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr)
goto Enamelen;
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
goto Espan;
+ if (unlikely(p->inode &&
+ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode))))
+ goto Einumber;
}
if (offs != limit)
goto Eend;
@@ -160,6 +163,9 @@ Enamelen:
goto bad_entry;
Espan:
error = "directory entry across blocks";
+ goto bad_entry;
+Einumber:
+ error = "disallowed inode number";
bad_entry:
nilfs_error(sb,
"bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
@@ -377,11 +383,39 @@ found:
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+ de = nilfs_get_folio(dir, 0, &folio);
if (IS_ERR(de))
return NULL;
- return nilfs_next_entry(de);
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
+
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
+ }
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 612e609158b5..1e86b9303b7c 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -56,13 +56,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct nilfs_palloc_req req;
int ret;
- req.pr_entry_nr = 0; /*
- * 0 says find free inode from beginning
- * of a group. dull code!!
- */
+ req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb);
req.pr_entry_bh = NULL;
- ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
+ ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
&req.pr_entry_bh);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 728e90be3570..4017f7856440 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -116,9 +116,15 @@ enum {
#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
#define NILFS_MDT_INODE(sb, ino) \
- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino)))
+ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino)))
#define NILFS_VALID_INODE(sb, ino) \
- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino)))
+ ((ino) >= NILFS_FIRST_INO(sb) || \
+ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
+
+#define NILFS_PRIVATE_INODE(ino) ({ \
+ ino_t __ino = (ino); \
+ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \
+ (__ino) != NILFS_SKETCH_INO); })
/**
* struct nilfs_transaction_info: context information for synchronization
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index f41d7b6d432c..e44dde57ab65 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
}
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
+ if (nilfs->ns_first_ino < NILFS_USER_INO) {
+ nilfs_err(nilfs->ns_sb,
+ "too small lower limit for non-reserved inode numbers: %u",
+ nilfs->ns_first_ino);
+ return -EINVAL;
+ }
nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 85da0629415d..1e829ed7b0ef 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -182,7 +182,7 @@ struct the_nilfs {
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
- int ns_first_ino;
+ unsigned int ns_first_ino;
u32 ns_crc_seed;
/* /sys/fs/<nilfs>/<device> */
diff --git a/fs/nls/mac-celtic.c b/fs/nls/mac-celtic.c
index 266c2d7d50bd..2963f3299d7e 100644
--- a/fs/nls/mac-celtic.c
+++ b/fs/nls/mac-celtic.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macceltic(void)
module_init(init_nls_macceltic)
module_exit(exit_nls_macceltic)
+MODULE_DESCRIPTION("NLS Codepage macceltic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-centeuro.c b/fs/nls/mac-centeuro.c
index 9789c6057551..43b20f4bdb67 100644
--- a/fs/nls/mac-centeuro.c
+++ b/fs/nls/mac-centeuro.c
@@ -528,4 +528,5 @@ static void __exit exit_nls_maccenteuro(void)
module_init(init_nls_maccenteuro)
module_exit(exit_nls_maccenteuro)
+MODULE_DESCRIPTION("NLS Codepage maccenteuro");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-croatian.c b/fs/nls/mac-croatian.c
index bb19e7a07d43..62730d6a64e5 100644
--- a/fs/nls/mac-croatian.c
+++ b/fs/nls/mac-croatian.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_maccroatian(void)
module_init(init_nls_maccroatian)
module_exit(exit_nls_maccroatian)
+MODULE_DESCRIPTION("NLS Codepage maccroatian");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-cyrillic.c b/fs/nls/mac-cyrillic.c
index 2a7dea36acba..7a5c4d16aac8 100644
--- a/fs/nls/mac-cyrillic.c
+++ b/fs/nls/mac-cyrillic.c
@@ -493,4 +493,5 @@ static void __exit exit_nls_maccyrillic(void)
module_init(init_nls_maccyrillic)
module_exit(exit_nls_maccyrillic)
+MODULE_DESCRIPTION("NLS Codepage maccyrillic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-gaelic.c b/fs/nls/mac-gaelic.c
index 77b001653588..3d22f03a90b6 100644
--- a/fs/nls/mac-gaelic.c
+++ b/fs/nls/mac-gaelic.c
@@ -563,4 +563,5 @@ static void __exit exit_nls_macgaelic(void)
module_init(init_nls_macgaelic)
module_exit(exit_nls_macgaelic)
+MODULE_DESCRIPTION("NLS Codepage macgaelic");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-greek.c b/fs/nls/mac-greek.c
index 1eccf499e2eb..de3aa9ddb5b1 100644
--- a/fs/nls/mac-greek.c
+++ b/fs/nls/mac-greek.c
@@ -493,4 +493,5 @@ static void __exit exit_nls_macgreek(void)
module_init(init_nls_macgreek)
module_exit(exit_nls_macgreek)
+MODULE_DESCRIPTION("NLS Codepage macgreek");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-iceland.c b/fs/nls/mac-iceland.c
index cbd0875c6d69..0bba83f9d415 100644
--- a/fs/nls/mac-iceland.c
+++ b/fs/nls/mac-iceland.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_maciceland(void)
module_init(init_nls_maciceland)
module_exit(exit_nls_maciceland)
+MODULE_DESCRIPTION("NLS Codepage maciceland");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-inuit.c b/fs/nls/mac-inuit.c
index fba8357aaf03..493386832dfd 100644
--- a/fs/nls/mac-inuit.c
+++ b/fs/nls/mac-inuit.c
@@ -528,4 +528,5 @@ static void __exit exit_nls_macinuit(void)
module_init(init_nls_macinuit)
module_exit(exit_nls_macinuit)
+MODULE_DESCRIPTION("NLS Codepage macinuit");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-roman.c b/fs/nls/mac-roman.c
index b6a98a5208cd..d3c082173c20 100644
--- a/fs/nls/mac-roman.c
+++ b/fs/nls/mac-roman.c
@@ -633,4 +633,5 @@ static void __exit exit_nls_macroman(void)
module_init(init_nls_macroman)
module_exit(exit_nls_macroman)
+MODULE_DESCRIPTION("NLS Codepage macroman");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-romanian.c b/fs/nls/mac-romanian.c
index 25547f023638..a7735852f2d5 100644
--- a/fs/nls/mac-romanian.c
+++ b/fs/nls/mac-romanian.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macromanian(void)
module_init(init_nls_macromanian)
module_exit(exit_nls_macromanian)
+MODULE_DESCRIPTION("NLS Codepage macromanian");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/mac-turkish.c b/fs/nls/mac-turkish.c
index b5454bc7b7fa..d77e9b6b7d7c 100644
--- a/fs/nls/mac-turkish.c
+++ b/fs/nls/mac-turkish.c
@@ -598,4 +598,5 @@ static void __exit exit_nls_macturkish(void)
module_init(init_nls_macturkish)
module_exit(exit_nls_macturkish)
+MODULE_DESCRIPTION("NLS Codepage macturkish");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_ascii.c b/fs/nls/nls_ascii.c
index a2620650d5e4..068143d71284 100644
--- a/fs/nls/nls_ascii.c
+++ b/fs/nls/nls_ascii.c
@@ -163,4 +163,5 @@ static void __exit exit_nls_ascii(void)
module_init(init_nls_ascii)
module_exit(exit_nls_ascii)
+MODULE_DESCRIPTION("NLS ASCII (United States)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index a026dbd3593f..18d597e49a19 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -545,4 +545,5 @@ EXPORT_SYMBOL(unload_nls);
EXPORT_SYMBOL(load_nls);
EXPORT_SYMBOL(load_nls_default);
+MODULE_DESCRIPTION("Base file system native language support");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1250.c b/fs/nls/nls_cp1250.c
index ace3e19d3407..e22a57a4b828 100644
--- a/fs/nls/nls_cp1250.c
+++ b/fs/nls/nls_cp1250.c
@@ -343,4 +343,5 @@ static void __exit exit_nls_cp1250(void)
module_init(init_nls_cp1250)
module_exit(exit_nls_cp1250)
+MODULE_DESCRIPTION("NLS Windows CP1250 (Slavic/Central European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1251.c b/fs/nls/nls_cp1251.c
index 9273ddfd08a1..6f46d339f23c 100644
--- a/fs/nls/nls_cp1251.c
+++ b/fs/nls/nls_cp1251.c
@@ -298,4 +298,5 @@ static void __exit exit_nls_cp1251(void)
module_init(init_nls_cp1251)
module_exit(exit_nls_cp1251)
+MODULE_DESCRIPTION("NLS Windows CP1251 (Bulgarian, Belarusian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp1255.c b/fs/nls/nls_cp1255.c
index 1caf5dfed85b..299e089d4301 100644
--- a/fs/nls/nls_cp1255.c
+++ b/fs/nls/nls_cp1255.c
@@ -380,5 +380,6 @@ static void __exit exit_nls_cp1255(void)
module_init(init_nls_cp1255)
module_exit(exit_nls_cp1255)
+MODULE_DESCRIPTION("NLS Hebrew charsets (ISO-8859-8, CP1255)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(iso8859-8);
diff --git a/fs/nls/nls_cp437.c b/fs/nls/nls_cp437.c
index 7ddb830da3fd..ab880499ea32 100644
--- a/fs/nls/nls_cp437.c
+++ b/fs/nls/nls_cp437.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp437(void)
module_init(init_nls_cp437)
module_exit(exit_nls_cp437)
+MODULE_DESCRIPTION("NLS Codepage 437 (United States, Canada)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp737.c b/fs/nls/nls_cp737.c
index c593f683a0cd..5c37618296e9 100644
--- a/fs/nls/nls_cp737.c
+++ b/fs/nls/nls_cp737.c
@@ -347,4 +347,5 @@ static void __exit exit_nls_cp737(void)
module_init(init_nls_cp737)
module_exit(exit_nls_cp737)
+MODULE_DESCRIPTION("NLS Codepage 737 (Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp775.c b/fs/nls/nls_cp775.c
index 554c863745f2..51ccc908901f 100644
--- a/fs/nls/nls_cp775.c
+++ b/fs/nls/nls_cp775.c
@@ -316,4 +316,5 @@ static void __exit exit_nls_cp775(void)
module_init(init_nls_cp775)
module_exit(exit_nls_cp775)
+MODULE_DESCRIPTION("NLS Codepage 775 (Baltic Rim)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp850.c b/fs/nls/nls_cp850.c
index 56cccd14b40b..5f9b9507a8b6 100644
--- a/fs/nls/nls_cp850.c
+++ b/fs/nls/nls_cp850.c
@@ -312,4 +312,5 @@ static void __exit exit_nls_cp850(void)
module_init(init_nls_cp850)
module_exit(exit_nls_cp850)
+MODULE_DESCRIPTION("NLS Codepage 850 (Europe)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp852.c b/fs/nls/nls_cp852.c
index 7cdc05ac1d40..fc513a5e8358 100644
--- a/fs/nls/nls_cp852.c
+++ b/fs/nls/nls_cp852.c
@@ -334,4 +334,5 @@ static void __exit exit_nls_cp852(void)
module_init(init_nls_cp852)
module_exit(exit_nls_cp852)
+MODULE_DESCRIPTION("NLS Codepage 852 (Central/Eastern Europe)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp855.c b/fs/nls/nls_cp855.c
index 7426eea05663..a43be58adb36 100644
--- a/fs/nls/nls_cp855.c
+++ b/fs/nls/nls_cp855.c
@@ -296,4 +296,5 @@ static void __exit exit_nls_cp855(void)
module_init(init_nls_cp855)
module_exit(exit_nls_cp855)
+MODULE_DESCRIPTION("NLS Codepage 855 (Cyrillic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp857.c b/fs/nls/nls_cp857.c
index 098309733ebd..772cd4195bad 100644
--- a/fs/nls/nls_cp857.c
+++ b/fs/nls/nls_cp857.c
@@ -298,4 +298,5 @@ static void __exit exit_nls_cp857(void)
module_init(init_nls_cp857)
module_exit(exit_nls_cp857)
+MODULE_DESCRIPTION("NLS Codepage 857 (Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp860.c b/fs/nls/nls_cp860.c
index 84224478e731..36cf4ca11966 100644
--- a/fs/nls/nls_cp860.c
+++ b/fs/nls/nls_cp860.c
@@ -361,4 +361,5 @@ static void __exit exit_nls_cp860(void)
module_init(init_nls_cp860)
module_exit(exit_nls_cp860)
+MODULE_DESCRIPTION("NLS Codepage 860 (Portuguese)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp861.c b/fs/nls/nls_cp861.c
index dc873e4be092..b7397d079f8f 100644
--- a/fs/nls/nls_cp861.c
+++ b/fs/nls/nls_cp861.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp861(void)
module_init(init_nls_cp861)
module_exit(exit_nls_cp861)
+MODULE_DESCRIPTION("NLS Codepage 861 (Icelandic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp862.c b/fs/nls/nls_cp862.c
index d5263e3c5566..fd3b95d1e95d 100644
--- a/fs/nls/nls_cp862.c
+++ b/fs/nls/nls_cp862.c
@@ -418,4 +418,5 @@ static void __exit exit_nls_cp862(void)
module_init(init_nls_cp862)
module_exit(exit_nls_cp862)
+MODULE_DESCRIPTION("NLS Codepage 862 (Hebrew)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp863.c b/fs/nls/nls_cp863.c
index 051c9832e36a..813ae7944249 100644
--- a/fs/nls/nls_cp863.c
+++ b/fs/nls/nls_cp863.c
@@ -378,4 +378,5 @@ static void __exit exit_nls_cp863(void)
module_init(init_nls_cp863)
module_exit(exit_nls_cp863)
+MODULE_DESCRIPTION("NLS Codepage 863 (Canadian French)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp864.c b/fs/nls/nls_cp864.c
index 97eb1273b2f7..d9eb6d5cd47a 100644
--- a/fs/nls/nls_cp864.c
+++ b/fs/nls/nls_cp864.c
@@ -404,4 +404,5 @@ static void __exit exit_nls_cp864(void)
module_init(init_nls_cp864)
module_exit(exit_nls_cp864)
+MODULE_DESCRIPTION("NLS Codepage 864 (Arabic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp865.c b/fs/nls/nls_cp865.c
index 111214228525..2678ffd98bb6 100644
--- a/fs/nls/nls_cp865.c
+++ b/fs/nls/nls_cp865.c
@@ -384,4 +384,5 @@ static void __exit exit_nls_cp865(void)
module_init(init_nls_cp865)
module_exit(exit_nls_cp865)
+MODULE_DESCRIPTION("NLS Codepage 865 (Norwegian, Danish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp866.c b/fs/nls/nls_cp866.c
index ffdcbc3fc38d..7e93d0a3802a 100644
--- a/fs/nls/nls_cp866.c
+++ b/fs/nls/nls_cp866.c
@@ -302,4 +302,5 @@ static void __exit exit_nls_cp866(void)
module_init(init_nls_cp866)
module_exit(exit_nls_cp866)
+MODULE_DESCRIPTION("NLS Codepage 866 (Cyrillic/Russian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp869.c b/fs/nls/nls_cp869.c
index 3b5a34589354..4491737dd5cb 100644
--- a/fs/nls/nls_cp869.c
+++ b/fs/nls/nls_cp869.c
@@ -312,4 +312,5 @@ static void __exit exit_nls_cp869(void)
module_init(init_nls_cp869)
module_exit(exit_nls_cp869)
+MODULE_DESCRIPTION("NLS Codepage 869 (Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_cp874.c b/fs/nls/nls_cp874.c
index 8dfaa10710fa..4fcfbf8ca72c 100644
--- a/fs/nls/nls_cp874.c
+++ b/fs/nls/nls_cp874.c
@@ -271,5 +271,6 @@ static void __exit exit_nls_cp874(void)
module_init(init_nls_cp874)
module_exit(exit_nls_cp874)
+MODULE_DESCRIPTION("NLS Thai charset (CP874, TIS-620)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(tis-620);
diff --git a/fs/nls/nls_cp932.c b/fs/nls/nls_cp932.c
index 67b7398e8483..e5e6270fcca6 100644
--- a/fs/nls/nls_cp932.c
+++ b/fs/nls/nls_cp932.c
@@ -7929,5 +7929,6 @@ static void __exit exit_nls_cp932(void)
module_init(init_nls_cp932)
module_exit(exit_nls_cp932)
+MODULE_DESCRIPTION("NLS Japanese charset (Shift-JIS)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(sjis);
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index c96546cfec9f..91d0a15fd7f9 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -11107,5 +11107,6 @@ static void __exit exit_nls_cp936(void)
module_init(init_nls_cp936)
module_exit(exit_nls_cp936)
+MODULE_DESCRIPTION("NLS Simplified Chinese charset (CP936, GB2312)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(gb2312);
diff --git a/fs/nls/nls_cp949.c b/fs/nls/nls_cp949.c
index 199171e97aa4..3ae03c76d59c 100644
--- a/fs/nls/nls_cp949.c
+++ b/fs/nls/nls_cp949.c
@@ -13942,5 +13942,6 @@ static void __exit exit_nls_cp949(void)
module_init(init_nls_cp949)
module_exit(exit_nls_cp949)
+MODULE_DESCRIPTION("NLS Korean charset (CP949, EUC-KR)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(euc-kr);
diff --git a/fs/nls/nls_cp950.c b/fs/nls/nls_cp950.c
index 8e1418708209..e968aa80198d 100644
--- a/fs/nls/nls_cp950.c
+++ b/fs/nls/nls_cp950.c
@@ -9478,5 +9478,6 @@ static void __exit exit_nls_cp950(void)
module_init(init_nls_cp950)
module_exit(exit_nls_cp950)
+MODULE_DESCRIPTION("NLS Traditional Chinese charset (Big5)");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_NLS(big5);
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c
index 162b3f160353..0191cc9d955e 100644
--- a/fs/nls/nls_euc-jp.c
+++ b/fs/nls/nls_euc-jp.c
@@ -577,4 +577,5 @@ static void __exit exit_nls_euc_jp(void)
module_init(init_nls_euc_jp)
module_exit(exit_nls_euc_jp)
+MODULE_DESCRIPTION("NLS Japanese charset (EUC-JP)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-1.c b/fs/nls/nls_iso8859-1.c
index 69ac020d43b1..a181be488f7d 100644
--- a/fs/nls/nls_iso8859-1.c
+++ b/fs/nls/nls_iso8859-1.c
@@ -254,4 +254,5 @@ static void __exit exit_nls_iso8859_1(void)
module_init(init_nls_iso8859_1)
module_exit(exit_nls_iso8859_1)
+MODULE_DESCRIPTION("NLS ISO 8859-1 (Latin 1; Western European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-13.c b/fs/nls/nls_iso8859-13.c
index afb3f8f275f0..8e2be5bfeaf1 100644
--- a/fs/nls/nls_iso8859-13.c
+++ b/fs/nls/nls_iso8859-13.c
@@ -282,4 +282,5 @@ static void __exit exit_nls_iso8859_13(void)
module_init(init_nls_iso8859_13)
module_exit(exit_nls_iso8859_13)
+MODULE_DESCRIPTION("NLS ISO 8859-13 (Latin 7; Baltic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-14.c b/fs/nls/nls_iso8859-14.c
index 046370f0b6f0..c789eccb8a69 100644
--- a/fs/nls/nls_iso8859-14.c
+++ b/fs/nls/nls_iso8859-14.c
@@ -338,4 +338,5 @@ static void __exit exit_nls_iso8859_14(void)
module_init(init_nls_iso8859_14)
module_exit(exit_nls_iso8859_14)
+MODULE_DESCRIPTION("NLS ISO 8859-14 (Latin 8; Celtic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-15.c b/fs/nls/nls_iso8859-15.c
index 7e34a841a056..ffec649176fb 100644
--- a/fs/nls/nls_iso8859-15.c
+++ b/fs/nls/nls_iso8859-15.c
@@ -304,4 +304,5 @@ static void __exit exit_nls_iso8859_15(void)
module_init(init_nls_iso8859_15)
module_exit(exit_nls_iso8859_15)
+MODULE_DESCRIPTION("NLS ISO 8859-15 (Latin 9; Western European Languages with Euro)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-2.c b/fs/nls/nls_iso8859-2.c
index 7dd571181741..d352334d0314 100644
--- a/fs/nls/nls_iso8859-2.c
+++ b/fs/nls/nls_iso8859-2.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_2(void)
module_init(init_nls_iso8859_2)
module_exit(exit_nls_iso8859_2)
+MODULE_DESCRIPTION("NLS ISO 8859-2 (Latin 2; Slavic/Central European Languages)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-3.c b/fs/nls/nls_iso8859-3.c
index 740b75ec4493..09990e6634d2 100644
--- a/fs/nls/nls_iso8859-3.c
+++ b/fs/nls/nls_iso8859-3.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_3(void)
module_init(init_nls_iso8859_3)
module_exit(exit_nls_iso8859_3)
+MODULE_DESCRIPTION("NLS ISO 8859-3 (Latin 3; Esperanto, Galician, Maltese, Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-4.c b/fs/nls/nls_iso8859-4.c
index 8826021e32f5..92795224912e 100644
--- a/fs/nls/nls_iso8859-4.c
+++ b/fs/nls/nls_iso8859-4.c
@@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_4(void)
module_init(init_nls_iso8859_4)
module_exit(exit_nls_iso8859_4)
+MODULE_DESCRIPTION("NLS ISO 8859-4 (Latin 4; old Baltic charset)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-5.c b/fs/nls/nls_iso8859-5.c
index 7c04057a1ad8..32309315307a 100644
--- a/fs/nls/nls_iso8859-5.c
+++ b/fs/nls/nls_iso8859-5.c
@@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_5(void)
module_init(init_nls_iso8859_5)
module_exit(exit_nls_iso8859_5)
+MODULE_DESCRIPTION("NLS ISO 8859-5 (Cyrillic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-6.c b/fs/nls/nls_iso8859-6.c
index d4a881400d74..c18183469d2a 100644
--- a/fs/nls/nls_iso8859-6.c
+++ b/fs/nls/nls_iso8859-6.c
@@ -260,4 +260,5 @@ static void __exit exit_nls_iso8859_6(void)
module_init(init_nls_iso8859_6)
module_exit(exit_nls_iso8859_6)
+MODULE_DESCRIPTION("NLS ISO 8859-6 (Arabic)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-7.c b/fs/nls/nls_iso8859-7.c
index 37b75d825a75..3652d6832864 100644
--- a/fs/nls/nls_iso8859-7.c
+++ b/fs/nls/nls_iso8859-7.c
@@ -314,4 +314,5 @@ static void __exit exit_nls_iso8859_7(void)
module_init(init_nls_iso8859_7)
module_exit(exit_nls_iso8859_7)
+MODULE_DESCRIPTION("NLS ISO 8859-7 (Modern Greek)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_iso8859-9.c b/fs/nls/nls_iso8859-9.c
index 557b98250d37..11a67834b855 100644
--- a/fs/nls/nls_iso8859-9.c
+++ b/fs/nls/nls_iso8859-9.c
@@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_9(void)
module_init(init_nls_iso8859_9)
module_exit(exit_nls_iso8859_9)
+MODULE_DESCRIPTION("NLS ISO 8859-9 (Latin 5; Turkish)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-r.c b/fs/nls/nls_koi8-r.c
index 811f232fccfb..e3dca27a3803 100644
--- a/fs/nls/nls_koi8-r.c
+++ b/fs/nls/nls_koi8-r.c
@@ -320,4 +320,5 @@ static void __exit exit_nls_koi8_r(void)
module_init(init_nls_koi8_r)
module_exit(exit_nls_koi8_r)
+MODULE_DESCRIPTION("NLS KOI8-R (Russian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c
index a80a741a8676..07afcd9e58c0 100644
--- a/fs/nls/nls_koi8-ru.c
+++ b/fs/nls/nls_koi8-ru.c
@@ -79,4 +79,5 @@ static void __exit exit_nls_koi8_ru(void)
module_init(init_nls_koi8_ru)
module_exit(exit_nls_koi8_ru)
+MODULE_DESCRIPTION("NLS KOI8-RU (Belarusian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_koi8-u.c b/fs/nls/nls_koi8-u.c
index 7e029e4c188a..f60645758c1a 100644
--- a/fs/nls/nls_koi8-u.c
+++ b/fs/nls/nls_koi8-u.c
@@ -327,4 +327,5 @@ static void __exit exit_nls_koi8_u(void)
module_init(init_nls_koi8_u)
module_exit(exit_nls_koi8_u)
+MODULE_DESCRIPTION("NLS KOI8-U (Ukrainian)");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_ucs2_utils.c b/fs/nls/nls_ucs2_utils.c
index a69781c54dd8..d4564b79d7bf 100644
--- a/fs/nls/nls_ucs2_utils.c
+++ b/fs/nls/nls_ucs2_utils.c
@@ -16,6 +16,7 @@
#include <asm/unaligned.h>
#include "nls_ucs2_utils.h"
+MODULE_DESCRIPTION("NLS UCS-2");
MODULE_LICENSE("GPL");
/*
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index afcfbc4a14db..a0fa0610eaac 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -64,4 +64,5 @@ static void __exit exit_nls_utf8(void)
module_init(init_nls_utf8)
module_exit(exit_nls_utf8)
+MODULE_DESCRIPTION("NLS UTF-8");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 07e22a15ef02..a4a925dce331 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -8,10 +8,12 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
+#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/nsfs.h>
#include <linux/uaccess.h>
+#include "mount.h"
#include "internal.h"
static struct vfsmount *nsfs_mnt;
@@ -82,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task,
return ns_get_path_cb(path, ns_get_path_task, &args);
}
-int open_related_ns(struct ns_common *ns,
- struct ns_common *(*get_ns)(struct ns_common *ns))
+/**
+ * open_namespace - open a namespace
+ * @ns: the namespace to open
+ *
+ * This will consume a reference to @ns indendent of success or failure.
+ *
+ * Return: A file descriptor on success or a negative error code on failure.
+ */
+int open_namespace(struct ns_common *ns)
{
- struct path path = {};
- struct ns_common *relative;
+ struct path path __free(path_put) = {};
struct file *f;
int err;
- int fd;
- fd = get_unused_fd_flags(O_CLOEXEC);
+ /* call first to consume reference */
+ err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
+ if (err < 0)
+ return err;
+
+ CLASS(get_unused_fd, fd)(O_CLOEXEC);
if (fd < 0)
return fd;
- relative = get_ns(ns);
- if (IS_ERR(relative)) {
- put_unused_fd(fd);
- return PTR_ERR(relative);
- }
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ if (IS_ERR(f))
+ return PTR_ERR(f);
- err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path);
- if (err < 0) {
- put_unused_fd(fd);
- return err;
- }
+ fd_install(fd, f);
+ return take_fd(fd);
+}
- f = dentry_open(&path, O_RDONLY, current_cred());
- path_put(&path);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = PTR_ERR(f);
- } else
- fd_install(fd, f);
+int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct ns_common *relative;
+
+ relative = get_ns(ns);
+ if (IS_ERR(relative))
+ return PTR_ERR(relative);
- return fd;
+ return open_namespace(relative);
}
EXPORT_SYMBOL_GPL(open_related_ns);
@@ -123,9 +132,12 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
struct user_namespace *user_ns;
+ struct pid_namespace *pid_ns;
+ struct task_struct *tsk;
struct ns_common *ns = get_proc_ns(file_inode(filp));
uid_t __user *argp;
uid_t uid;
+ int ret;
switch (ioctl) {
case NS_GET_USERNS:
@@ -143,9 +155,69 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp);
+ case NS_GET_MNTNS_ID: {
+ struct mnt_namespace *mnt_ns;
+ __u64 __user *idp;
+ __u64 id;
+
+ if (ns->ops->type != CLONE_NEWNS)
+ return -EINVAL;
+
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ idp = (__u64 __user *)arg;
+ id = mnt_ns->seq;
+ return put_user(id, idp);
+ }
+ case NS_GET_PID_FROM_PIDNS:
+ fallthrough;
+ case NS_GET_TGID_FROM_PIDNS:
+ fallthrough;
+ case NS_GET_PID_IN_PIDNS:
+ fallthrough;
+ case NS_GET_TGID_IN_PIDNS:
+ if (ns->ops->type != CLONE_NEWPID)
+ return -EINVAL;
+
+ ret = -ESRCH;
+ pid_ns = container_of(ns, struct pid_namespace, ns);
+
+ rcu_read_lock();
+
+ if (ioctl == NS_GET_PID_IN_PIDNS ||
+ ioctl == NS_GET_TGID_IN_PIDNS)
+ tsk = find_task_by_vpid(arg);
+ else
+ tsk = find_task_by_pid_ns(arg, pid_ns);
+ if (!tsk)
+ break;
+
+ switch (ioctl) {
+ case NS_GET_PID_FROM_PIDNS:
+ ret = task_pid_vnr(tsk);
+ break;
+ case NS_GET_TGID_FROM_PIDNS:
+ ret = task_tgid_vnr(tsk);
+ break;
+ case NS_GET_PID_IN_PIDNS:
+ ret = task_pid_nr_ns(tsk, pid_ns);
+ break;
+ case NS_GET_TGID_IN_PIDNS:
+ ret = task_tgid_nr_ns(tsk, pid_ns);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!ret)
+ ret = -ESRCH;
+ break;
default:
- return -ENOTTY;
+ ret = -ENOTTY;
}
+
+ return ret;
}
int ns_get_name(char *buf, size_t size, struct task_struct *task,
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 27fbde2701b6..c5b688c5f984 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -259,8 +259,8 @@ enum Opt {
// clang-format off
static const struct fs_parameter_spec ntfs_fs_parameters[] = {
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("gid", Opt_gid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_gid("gid", Opt_gid),
fsparam_u32oct("umask", Opt_umask),
fsparam_u32oct("dmask", Opt_dmask),
fsparam_u32oct("fmask", Opt_fmask),
@@ -319,14 +319,10 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
switch (opt) {
case Opt_uid:
- opts->fs_uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(opts->fs_uid))
- return invalf(fc, "ntfs3: Invalid value for uid.");
+ opts->fs_uid = result.uid;
break;
case Opt_gid:
- opts->fs_gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(opts->fs_gid))
- return invalf(fc, "ntfs3: Invalid value for gid.");
+ opts->fs_gid = result.gid;
break;
case Opt_umask:
if (result.uint_32 & ~07777)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f0467d3b3c88..6be175a1ab3c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2366,6 +2366,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
}
list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ ret = ocfs2_assure_trans_credits(handle, credits);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 604fea3a26ff..530fba34f6d3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -446,6 +446,23 @@ bail:
}
/*
+ * Make sure handle has at least 'nblocks' credits available. If it does not
+ * have that many credits available, we will try to extend the handle to have
+ * enough credits. If that fails, we will restart transaction to have enough
+ * credits. Similar notes regarding data consistency and locking implications
+ * as for ocfs2_extend_trans() apply here.
+ */
+int ocfs2_assure_trans_credits(handle_t *handle, int nblocks)
+{
+ int old_nblks = jbd2_handle_buffer_credits(handle);
+
+ trace_ocfs2_assure_trans_credits(old_nblks);
+ if (old_nblks >= nblocks)
+ return 0;
+ return ocfs2_extend_trans(handle, nblocks - old_nblks);
+}
+
+/*
* If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for metadata modifications.
@@ -479,12 +496,6 @@ bail:
return status;
}
-
-struct ocfs2_triggers {
- struct jbd2_buffer_trigger_type ot_triggers;
- int ot_offset;
-};
-
static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
{
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
@@ -548,85 +559,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh)
{
+ struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
+
mlog(ML_ERROR,
"ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
"bh->b_blocknr = %llu\n",
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
- ocfs2_error(bh->b_assoc_map->host->i_sb,
+ ocfs2_error(ot->sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
-static struct ocfs2_triggers di_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dinode, i_check),
-};
-
-static struct ocfs2_triggers eb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
-};
-
-static struct ocfs2_triggers rb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
-};
-
-static struct ocfs2_triggers gd_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
-};
-
-static struct ocfs2_triggers db_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_db_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+static void ocfs2_setup_csum_triggers(struct super_block *sb,
+ enum ocfs2_journal_trigger_type type,
+ struct ocfs2_triggers *ot)
+{
+ BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT);
-static struct ocfs2_triggers xb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
-};
+ switch (type) {
+ case OCFS2_JTR_DI:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dinode, i_check);
+ break;
+ case OCFS2_JTR_EB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check);
+ break;
+ case OCFS2_JTR_RB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check);
+ break;
+ case OCFS2_JTR_GD:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check);
+ break;
+ case OCFS2_JTR_DB:
+ ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger;
+ break;
+ case OCFS2_JTR_XB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check);
+ break;
+ case OCFS2_JTR_DQ:
+ ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger;
+ break;
+ case OCFS2_JTR_DR:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check);
+ break;
+ case OCFS2_JTR_DL:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check);
+ break;
+ case OCFS2_JTR_NONE:
+ /* To make compiler happy... */
+ return;
+ }
-static struct ocfs2_triggers dq_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_dq_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+ ot->ot_triggers.t_abort = ocfs2_abort_trigger;
+ ot->sb = sb;
+}
-static struct ocfs2_triggers dr_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
-};
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[])
+{
+ enum ocfs2_journal_trigger_type type;
-static struct ocfs2_triggers dl_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
-};
+ for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++)
+ ocfs2_setup_csum_triggers(sb, type, &triggers[type]);
+}
static int __ocfs2_journal_access(handle_t *handle,
struct ocfs2_caching_info *ci,
@@ -708,56 +710,91 @@ static int __ocfs2_journal_access(handle_t *handle,
int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DI],
+ type);
}
int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_EB],
+ type);
}
int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_RB],
type);
}
int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_GD],
+ type);
}
int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DB],
+ type);
}
int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_XB],
+ type);
}
int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DQ],
+ type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DR],
+ type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DL],
+ type);
}
int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
@@ -778,13 +815,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
if (!is_handle_aborted(handle)) {
journal_t *journal = handle->h_transaction->t_journal;
- mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
- "Aborting transaction and journal.\n");
+ mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: "
+ "handle type %u started at line %u, credits %u/%u "
+ "errcode %d. Aborting transaction and journal.\n",
+ handle->h_type, handle->h_line_no,
+ handle->h_requested_credits,
+ jbd2_handle_buffer_credits(handle), status);
handle->h_err = status;
jbd2_journal_abort_handle(handle);
jbd2_journal_abort(journal, status);
- ocfs2_abort(bh->b_assoc_map->host->i_sb,
- "Journal already aborted.\n");
}
}
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 41c9fe7e62f9..e3c3a35dc5e0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -243,6 +243,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb,
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle);
int ocfs2_extend_trans(handle_t *handle, int nblocks);
+int ocfs2_assure_trans_credits(handle_t *handle,
+ int nblocks);
int ocfs2_allocate_extend_trans(handle_t *handle,
int thresh);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a503c553bab2..8fe826143d7b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -284,6 +284,30 @@ enum ocfs2_mount_options
#define OCFS2_OSB_ERROR_FS 0x0004
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
+struct ocfs2_triggers {
+ struct jbd2_buffer_trigger_type ot_triggers;
+ int ot_offset;
+ struct super_block *sb;
+};
+
+enum ocfs2_journal_trigger_type {
+ OCFS2_JTR_DI,
+ OCFS2_JTR_EB,
+ OCFS2_JTR_RB,
+ OCFS2_JTR_GD,
+ OCFS2_JTR_DB,
+ OCFS2_JTR_XB,
+ OCFS2_JTR_DQ,
+ OCFS2_JTR_DR,
+ OCFS2_JTR_DL,
+ OCFS2_JTR_NONE /* This must be the last entry */
+};
+
+#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE
+
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[]);
+
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@@ -351,6 +375,9 @@ struct ocfs2_super
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
+ /* Journal triggers for checksum */
+ struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT];
+
struct delayed_work la_enable_wq;
/*
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 60e208b01c8d..0511c69c9fde 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2577,6 +2577,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
+DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits);
+
DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8aabaed2c1cb..afee70125ae3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1075,9 +1075,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
osb, &ocfs2_osb_debug_fops);
- if (ocfs2_meta_ecc(osb))
+ if (ocfs2_meta_ecc(osb)) {
+ ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers);
ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
osb->osb_debug_root);
+ }
status = ocfs2_mount_volume(sb);
if (status < 0)
diff --git a/fs/open.c b/fs/open.c
index 89cafb572061..22adbef7ecc2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -202,13 +202,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
return error;
}
-SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
+SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
+COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
@@ -247,6 +247,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
long ret;
+ loff_t sum;
if (offset < 0 || len <= 0)
return -EINVAL;
@@ -319,8 +320,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
return -ENODEV;
- /* Check for wrap through zero too */
- if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
+ /* Check for wraparound */
+ if (check_add_overflow(offset, len, &sum))
+ return -EFBIG;
+
+ if (sum > inode->i_sb->s_maxbytes)
return -EFBIG;
if (!file->f_op->fallocate)
@@ -982,12 +986,11 @@ static int do_dentry_open(struct file *f,
*/
if (f->f_mode & FMODE_WRITE) {
/*
- * Paired with smp_mb() in collapse_file() to ensure nr_thps
- * is up to date and the update to i_writecount by
- * get_write_access() is visible. Ensures subsequent insertion
- * of THPs into the page cache will fail.
+ * Depends on full fence from get_write_access() to synchronize
+ * against collapse_file() regarding i_writecount and nr_thps
+ * updates. Ensures subsequent insertion of THPs into the page
+ * cache will fail.
*/
- smp_mb();
if (filemap_nr_thps(inode->i_mapping)) {
struct address_space *mapping = inode->i_mapping;
@@ -1004,11 +1007,6 @@ static int do_dentry_open(struct file *f,
}
}
- /*
- * Once we return a file with FMODE_OPENED, __fput() will call
- * fsnotify_close(), so we need fsnotify_open() here for symmetry.
- */
- fsnotify_open(f);
return 0;
cleanup_all:
@@ -1085,8 +1083,19 @@ EXPORT_SYMBOL(file_path);
*/
int vfs_open(const struct path *path, struct file *file)
{
+ int ret;
+
file->f_path = *path;
- return do_dentry_open(file, NULL);
+ ret = do_dentry_open(file, NULL);
+ if (!ret) {
+ /*
+ * Once we return a file with FMODE_OPENED, __fput() will call
+ * fsnotify_close(), so we need fsnotify_open() here for
+ * symmetry.
+ */
+ fsnotify_open(file);
+ }
+ return ret;
}
struct file *dentry_open(const struct path *path, int flags,
@@ -1177,8 +1186,10 @@ struct file *kernel_file_open(const struct path *path, int flags,
error = do_dentry_open(f, NULL);
if (error) {
fput(f);
- f = ERR_PTR(error);
+ return ERR_PTR(error);
}
+
+ fsnotify_open(f);
return f;
}
EXPORT_SYMBOL_GPL(kernel_file_open);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a7b527ea50d3..26ecda0e4d19 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -471,4 +471,5 @@ static void __exit exit_openprom_fs(void)
module_init(init_openprom_fs)
module_exit(exit_openprom_fs)
+MODULE_DESCRIPTION("OpenPROM filesystem support");
MODULE_LICENSE("GPL");
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 085912268442..fdb9b65db1de 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -56,7 +56,6 @@ static int orangefs_writepage_locked(struct page *page,
ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
len, wr, NULL, NULL);
if (ret < 0) {
- SetPageError(page);
mapping_set_error(page->mapping, ret);
} else {
ret = 0;
@@ -119,7 +118,6 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
0, &wr, NULL, NULL);
if (ret < 0) {
for (i = 0; i < ow->npages; i++) {
- SetPageError(ow->pages[i]);
mapping_set_error(ow->pages[i]->mapping, ret);
if (PagePrivate(ow->pages[i])) {
wrp = (struct orangefs_write_range *)
@@ -303,15 +301,10 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
iov_iter_zero(~0U, &iter);
/* takes care of potential aliasing */
flush_dcache_folio(folio);
- if (ret < 0) {
- folio_set_error(folio);
- } else {
- folio_mark_uptodate(folio);
+ if (ret > 0)
ret = 0;
- }
- /* unlock the folio after the ->read_folio() routine completes */
- folio_unlock(folio);
- return ret;
+ folio_end_read(folio, ret == 0);
+ return ret;
}
static int orangefs_write_begin(struct file *file,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index b501dc07f922..edcca4beb765 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -274,10 +274,8 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
gossip_err("orangefs error: asked for %d pages, only got %d.\n",
bufmap->page_count, ret);
- for (i = 0; i < ret; i++) {
- SetPageError(bufmap->page_array[i]);
+ for (i = 0; i < ret; i++)
unpin_user_page(bufmap->page_array[i]);
- }
return -ENOMEM;
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 116f542442dd..ab65e98a1def 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1314,10 +1314,6 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
int flags = file->f_flags | OVL_OPEN_FLAGS;
int err;
- err = ovl_copy_up(dentry->d_parent);
- if (err)
- return err;
-
old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_setup_cred_for_create(dentry, inode, mode, old_cred);
if (err)
@@ -1360,6 +1356,10 @@ static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (!OVL_FS(dentry->d_sb)->tmpfile)
return -EOPNOTSUPP;
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ return err;
+
err = ovl_want_write(dentry);
if (err)
return err;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 063409069f56..5868cb222955 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -181,6 +181,10 @@ static int ovl_check_encode_origin(struct dentry *dentry)
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
bool decodable = ofs->config.nfs_export;
+ /* No upper layer? */
+ if (!ovl_upper_mnt(ofs))
+ return 1;
+
/* Lower file handle for non-upper non-decodable */
if (!ovl_dentry_upper(dentry) && !decodable)
return 1;
@@ -209,7 +213,7 @@ static int ovl_check_encode_origin(struct dentry *dentry)
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
- if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable)
+ if (d_is_dir(dentry) && decodable)
return ovl_connect_layer(dentry);
/* Lower file handle for indexed and non-upper dir/non-dir */
diff --git a/fs/pidfs.c b/fs/pidfs.c
index dbb9d854d1c5..c9cb14181def 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -11,10 +11,16 @@
#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/pseudo_fs.h>
+#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <uapi/linux/pidfd.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
#include "internal.h"
+#include "mount.h"
#ifdef CONFIG_PROC_FS
/**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
return poll_flags;
}
+static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct task_struct *task __free(put_task) = NULL;
+ struct nsproxy *nsp __free(put_nsproxy) = NULL;
+ struct pid *pid = pidfd_pid(file);
+ struct ns_common *ns_common;
+
+ if (arg)
+ return -EINVAL;
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ return -ESRCH;
+
+ scoped_guard(task_lock, task) {
+ nsp = task->nsproxy;
+ if (nsp)
+ get_nsproxy(nsp);
+ }
+ if (!nsp)
+ return -ESRCH; /* just pretend it didn't exist */
+
+ /*
+ * We're trying to open a file descriptor to the namespace so perform a
+ * filesystem cred ptrace check. Also, we mirror nsfs behavior.
+ */
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ return -EACCES;
+
+ switch (cmd) {
+ /* Namespaces that hang of nsproxy. */
+ case PIDFD_GET_CGROUP_NAMESPACE:
+ get_cgroup_ns(nsp->cgroup_ns);
+ ns_common = to_ns_common(nsp->cgroup_ns);
+ break;
+ case PIDFD_GET_IPC_NAMESPACE:
+ get_ipc_ns(nsp->ipc_ns);
+ ns_common = to_ns_common(nsp->ipc_ns);
+ break;
+ case PIDFD_GET_MNT_NAMESPACE:
+ get_mnt_ns(nsp->mnt_ns);
+ ns_common = to_ns_common(nsp->mnt_ns);
+ break;
+ case PIDFD_GET_NET_NAMESPACE:
+ ns_common = to_ns_common(nsp->net_ns);
+ get_net_ns(ns_common);
+ break;
+ case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+ get_pid_ns(nsp->pid_ns_for_children);
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
+ break;
+ case PIDFD_GET_TIME_NAMESPACE:
+ get_time_ns(nsp->time_ns);
+ ns_common = to_ns_common(nsp->time_ns);
+ break;
+ case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+ get_time_ns(nsp->time_ns_for_children);
+ ns_common = to_ns_common(nsp->time_ns_for_children);
+ break;
+ case PIDFD_GET_UTS_NAMESPACE:
+ get_uts_ns(nsp->uts_ns);
+ ns_common = to_ns_common(nsp->uts_ns);
+ break;
+ /* Namespaces that don't hang of nsproxy. */
+ case PIDFD_GET_USER_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
+ rcu_read_unlock();
+ break;
+ case PIDFD_GET_PID_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
+ rcu_read_unlock();
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ /* open_namespace() unconditionally consumes the reference */
+ return open_namespace(ns_common);
+}
+
static const struct file_operations pidfs_file_operations = {
.poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
#endif
+ .unlocked_ioctl = pidfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
struct pid *pidfd_pid(const struct file *file)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 775ce0bcf08c..c02f1e63f82d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -202,8 +202,8 @@ int proc_alloc_inum(unsigned int *inum)
{
int i;
- i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1,
- GFP_KERNEL);
+ i = ida_alloc_max(&proc_inum_ida, UINT_MAX - PROC_DYNAMIC_FIRST,
+ GFP_KERNEL);
if (i < 0)
return i;
@@ -213,7 +213,7 @@ int proc_alloc_inum(unsigned int *inum)
void proc_free_inum(unsigned int inum)
{
- ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
+ ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
}
static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f8d35f993fe5..71e5039d940d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -707,6 +707,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#ifdef CONFIG_X86_USER_SHADOW_STACK
[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
+#ifdef CONFIG_64BIT
+ [ilog2(VM_SEALED)] = "sl",
+#endif
};
size_t i;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 0a808951b7d3..e133b507ddf3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
return security_sb_show_options(m, sb);
}
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt)
{
static const struct proc_fs_opts mnt_opts[] = {
{ MNT_NOSUID, ",nosuid" },
@@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
err = show_sb_opts(m, sb);
if (err)
goto out;
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt_path.dentry);
seq_puts(m, " 0 0\n");
@@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
goto out;
seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
/* Tagged fields ("foo:X" or "bar") */
if (IS_MNT_SHARED(r))
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index d79841e94428..e399e2dd3a12 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -430,5 +430,6 @@ static void __exit exit_qnx4_fs(void)
module_init(init_qnx4_fs)
module_exit(exit_qnx4_fs)
+MODULE_DESCRIPTION("QNX4 file system");
MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index d62fbef838b6..4f1735b882b1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -694,4 +694,5 @@ static void __exit exit_qnx6_fs(void)
module_init(init_qnx6_fs)
module_exit(exit_qnx6_fs)
+MODULE_DESCRIPTION("QNX6 file system");
MODULE_LICENSE("GPL");
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 627eb2f72ef3..a2b256dac36e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2246,9 +2246,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
int cnt;
struct quota_info *dqopt = sb_dqopt(sb);
- /* s_umount should be held in exclusive mode */
- if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
- up_read(&sb->s_umount);
+ rwsem_assert_held_write(&sb->s_umount);
/* Cannot turn off usage accounting without turning off limits, or
* suspend quotas and simultaneously turn quotas off. */
@@ -2510,9 +2508,7 @@ int dquot_resume(struct super_block *sb, int type)
int ret = 0, cnt;
unsigned int flags;
- /* s_umount should be held in exclusive mode */
- if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
- up_read(&sb->s_umount);
+ rwsem_assert_held_write(&sb->s_umount);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
diff --git a/fs/read_write.c b/fs/read_write.c
index ef6339391351..90e283b31ca1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -730,7 +730,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
- ret = kiocb_set_rw_flags(&kiocb, flags);
+ ret = kiocb_set_rw_flags(&kiocb, flags, type);
if (ret)
return ret;
kiocb.ki_pos = (ppos ? *ppos : 0);
@@ -1736,3 +1736,19 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
return 0;
}
+
+bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos)
+{
+ size_t len = iov_iter_count(iter);
+
+ if (!iter_is_ubuf(iter))
+ return false;
+
+ if (!is_power_of_2(len))
+ return false;
+
+ if (!IS_ALIGNED(pos, len))
+ return false;
+
+ return true;
+}
diff --git a/fs/readdir.c b/fs/readdir.c
index 278bc0254732..d6c82421902a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -22,8 +22,6 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
-
/*
* Some filesystems were never converted to '->iterate_shared()'
* and their directory iterators want the inode lock held for
@@ -72,7 +70,7 @@ int wrap_directory_iterator(struct file *file,
EXPORT_SYMBOL(wrap_directory_iterator);
/*
- * Note the "unsafe_put_user() semantics: we goto a
+ * Note the "unsafe_put_user()" semantics: we goto a
* label for errors.
*/
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c1daedc50f4c..9b43a81a6488 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2699,7 +2699,6 @@ fail:
}
bh = bh->b_this_page;
} while (bh != head);
- folio_set_error(folio);
BUG_ON(folio_test_writeback(folio));
folio_start_writeback(folio);
folio_unlock(folio);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2cbb92462074..68758b6fed94 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -101,19 +101,15 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos);
*/
static int romfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
loff_t offset, size;
unsigned long fillsize, pos;
void *buf;
int ret;
- buf = kmap(page);
- if (!buf)
- return -ENOMEM;
+ buf = kmap_local_folio(folio, 0);
- /* 32 bit warning -- but not for us :) */
- offset = page_offset(page);
+ offset = folio_pos(folio);
size = i_size_read(inode);
fillsize = 0;
ret = 0;
@@ -125,20 +121,14 @@ static int romfs_read_folio(struct file *file, struct folio *folio)
ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize);
if (ret < 0) {
- SetPageError(page);
fillsize = 0;
ret = -EIO;
}
}
- if (fillsize < PAGE_SIZE)
- memset(buf + fillsize, 0, PAGE_SIZE - fillsize);
- if (ret == 0)
- SetPageUptodate(page);
-
- flush_dcache_page(page);
- kunmap(page);
- unlock_page(page);
+ buf = folio_zero_tail(folio, fillsize, buf);
+ kunmap_local(buf);
+ folio_end_read(folio, ret == 0);
return ret;
}
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index bb86fc0641d8..6397fdefd876 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -134,7 +134,7 @@ module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
module_param(enable_gcm_256, bool, 0644);
-MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0");
+MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0");
module_param(require_gcm_256, bool, 0644);
MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0");
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 73482734a8d8..a865941724c0 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1494,6 +1494,8 @@ struct cifs_aio_ctx {
struct cifs_io_request {
struct netfs_io_request rreq;
struct cifsFileInfo *cfile;
+ struct TCP_Server_Info *server;
+ pid_t pid;
};
/* asynchronous read support */
@@ -1504,7 +1506,6 @@ struct cifs_io_subrequest {
struct cifs_io_request *req;
};
ssize_t got_bytes;
- pid_t pid;
unsigned int xid;
int result;
bool have_xid;
@@ -1917,8 +1918,8 @@ require use of the stronger protocol */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
-#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
-#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2)
+#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL)
+#define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL)
#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
/*
*****************************************************************
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 25e9ab947c17..595c4b673707 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1345,8 +1345,8 @@ cifs_async_readv(struct cifs_io_subrequest *rdata)
if (rc)
return rc;
- smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid);
- smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16));
+ smb->hdr.Pid = cpu_to_le16((__u16)rdata->req->pid);
+ smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->req->pid >> 16));
smb->AndXCommand = 0xFF; /* none */
smb->Fid = rdata->req->cfile->fid.netfid;
@@ -1689,8 +1689,8 @@ cifs_async_writev(struct cifs_io_subrequest *wdata)
if (rc)
goto async_writev_out;
- smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
- smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
+ smb->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid);
+ smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16));
smb->AndXCommand = 0xFF; /* none */
smb->Fid = wdata->req->cfile->fid.netfid;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 9d5c2440abfc..1374635e89fa 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -134,17 +134,15 @@ fail:
static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
- struct TCP_Server_Info *server;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
+ struct TCP_Server_Info *server = req->server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
size_t rsize = 0;
int rc;
rdata->xid = get_xid();
rdata->have_xid = true;
-
- server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
if (cifs_sb->ctx->rsize == 0)
@@ -179,15 +177,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
- struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
- pid_t pid;
int rc = 0;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
- pid = req->cfile->pid;
- else
- pid = current->tgid; // Ummm... This may be a workqueue
-
cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
__func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
subreq->transferred, subreq->len);
@@ -201,16 +192,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
}
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- rdata->pid = pid;
-
- rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len);
- if (!rc) {
- if (rdata->req->cfile->invalidHandle)
- rc = -EAGAIN;
- else
- rc = rdata->server->ops->async_readv(rdata);
- }
+ rc = rdata->server->ops->async_readv(rdata);
out:
if (rc)
netfs_subreq_terminated(subreq, rc, false);
@@ -245,11 +228,15 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
rreq->rsize = cifs_sb->ctx->rsize;
rreq->wsize = cifs_sb->ctx->wsize;
+ req->pid = current->tgid; // Ummm... This may be a workqueue
if (file) {
open_file = file->private_data;
rreq->netfs_priv = file->private_data;
req->cfile = cifsFileInfo_get(open_file);
+ req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+ req->pid = req->cfile->pid;
} else if (rreq->origin != NETFS_WRITEBACK) {
WARN_ON_ONCE(1);
return -EIO;
@@ -259,35 +246,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
}
/*
- * Expand the size of a readahead to the size of the rsize, if at least as
- * large as a page, allowing for the possibility that rsize is not pow-2
- * aligned.
- */
-static void cifs_expand_readahead(struct netfs_io_request *rreq)
-{
- unsigned int rsize = rreq->rsize;
- loff_t misalignment, i_size = i_size_read(rreq->inode);
-
- if (rsize < PAGE_SIZE)
- return;
-
- if (rsize < INT_MAX)
- rsize = roundup_pow_of_two(rsize);
- else
- rsize = ((unsigned int)INT_MAX + 1) / 2;
-
- misalignment = rreq->start & (rsize - 1);
- if (misalignment) {
- rreq->start -= misalignment;
- rreq->len += misalignment;
- }
-
- rreq->len = round_up(rreq->len, rsize);
- if (rreq->start < i_size && rreq->len > i_size - rreq->start)
- rreq->len = i_size - rreq->start;
-}
-
-/*
* Completion of a request operation.
*/
static void cifs_rreq_done(struct netfs_io_request *rreq)
@@ -342,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = {
.init_request = cifs_init_request,
.free_request = cifs_free_request,
.free_subrequest = cifs_free_subrequest,
- .expand_readahead = cifs_expand_readahead,
.clamp_length = cifs_clamp_length,
.issue_read = cifs_req_issue_read,
.done = cifs_rreq_done,
@@ -3200,8 +3157,6 @@ static int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- WARN_ON_ONCE(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = netfs_unbuffered_read_iter_locked(iocb, iter);
else
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 3bbac925d076..bc926ab2555b 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -128,12 +128,14 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_flag("compress", Opt_compress),
fsparam_flag("witness", Opt_witness),
+ /* Mount options which take uid or gid */
+ fsparam_uid("backupuid", Opt_backupuid),
+ fsparam_gid("backupgid", Opt_backupgid),
+ fsparam_uid("uid", Opt_uid),
+ fsparam_uid("cruid", Opt_cruid),
+ fsparam_gid("gid", Opt_gid),
+
/* Mount options which take numeric value */
- fsparam_u32("backupuid", Opt_backupuid),
- fsparam_u32("backupgid", Opt_backupgid),
- fsparam_u32("uid", Opt_uid),
- fsparam_u32("cruid", Opt_cruid),
- fsparam_u32("gid", Opt_gid),
fsparam_u32("file_mode", Opt_file_mode),
fsparam_u32("dirmode", Opt_dirmode),
fsparam_u32("dir_mode", Opt_dirmode),
@@ -951,8 +953,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
int i, opt;
bool is_smb3 = !strcmp(fc->fs_type->name, "smb3");
bool skip_parsing = false;
- kuid_t uid;
- kgid_t gid;
cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key);
@@ -1083,38 +1083,23 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->linux_uid = uid;
+ ctx->linux_uid = result.uid;
ctx->uid_specified = true;
break;
case Opt_cruid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->cred_uid = uid;
+ ctx->cred_uid = result.uid;
ctx->cruid_specified = true;
break;
case Opt_backupuid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- goto cifs_parse_mount_err;
- ctx->backupuid = uid;
+ ctx->backupuid = result.uid;
ctx->backupuid_specified = true;
break;
case Opt_backupgid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- goto cifs_parse_mount_err;
- ctx->backupgid = gid;
+ ctx->backupgid = result.gid;
ctx->backupgid_specified = true;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- goto cifs_parse_mount_err;
- ctx->linux_gid = gid;
+ ctx->linux_gid = result.gid;
ctx->gid_specified = true;
break;
case Opt_port:
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 38a06e8a0f90..2ae2dbb6202b 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4484,6 +4484,16 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
return rc;
}
+static void smb2_readv_worker(struct work_struct *work)
+{
+ struct cifs_io_subrequest *rdata =
+ container_of(work, struct cifs_io_subrequest, subreq.work);
+
+ netfs_subreq_terminated(&rdata->subreq,
+ (rdata->result == 0 || rdata->result == -EAGAIN) ?
+ rdata->got_bytes : rdata->result, true);
+}
+
static void
smb2_readv_callback(struct mid_q_entry *mid)
{
@@ -4578,9 +4588,8 @@ smb2_readv_callback(struct mid_q_entry *mid)
rdata->result = 0;
}
rdata->credits.value = 0;
- netfs_subreq_terminated(&rdata->subreq,
- (rdata->result == 0 || rdata->result == -EAGAIN) ?
- rdata->got_bytes : rdata->result, true);
+ INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
+ queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid);
add_credits(server, &credits, 0);
}
@@ -4612,7 +4621,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
io_parms.length = rdata->subreq.len;
io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid;
io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid;
- io_parms.pid = rdata->pid;
+ io_parms.pid = rdata->req->pid;
rc = smb2_new_read_req(
(void **) &buf, &total_len, &io_parms, rdata, 0, 0);
@@ -4864,7 +4873,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
.length = wdata->subreq.len,
.persistent_fid = wdata->req->cfile->fid.persistent_fid,
.volatile_fid = wdata->req->cfile->fid.volatile_fid,
- .pid = wdata->pid,
+ .pid = wdata->req->pid,
};
io_parms = &_io_parms;
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8d10be1fe18a..c3ee42188d25 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -917,6 +917,40 @@ struct smb2_query_directory_rsp {
__u8 Buffer[];
} __packed;
+/* DeviceType Flags */
+#define FILE_DEVICE_CD_ROM 0x00000002
+#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003
+#define FILE_DEVICE_DFS 0x00000006
+#define FILE_DEVICE_DISK 0x00000007
+#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008
+#define FILE_DEVICE_FILE_SYSTEM 0x00000009
+#define FILE_DEVICE_NAMED_PIPE 0x00000011
+#define FILE_DEVICE_NETWORK 0x00000012
+#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014
+#define FILE_DEVICE_NULL 0x00000015
+#define FILE_DEVICE_PARALLEL_PORT 0x00000016
+#define FILE_DEVICE_PRINTER 0x00000018
+#define FILE_DEVICE_SERIAL_PORT 0x0000001b
+#define FILE_DEVICE_STREAMS 0x0000001e
+#define FILE_DEVICE_TAPE 0x0000001f
+#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020
+#define FILE_DEVICE_VIRTUAL_DISK 0x00000024
+#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028
+
+/* Device Characteristics */
+#define FILE_REMOVABLE_MEDIA 0x00000001
+#define FILE_READ_ONLY_DEVICE 0x00000002
+#define FILE_FLOPPY_DISKETTE 0x00000004
+#define FILE_WRITE_ONCE_MEDIA 0x00000008
+#define FILE_REMOTE_DEVICE 0x00000010
+#define FILE_DEVICE_IS_MOUNTED 0x00000020
+#define FILE_VIRTUAL_VOLUME 0x00000040
+#define FILE_DEVICE_SECURE_OPEN 0x00000100
+#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000
+#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000
+#define FILE_PORTABLE_DEVICE 0x00004000
+#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000
+
/*
* Maximum number of iovs we need for a set-info request.
* The largest one is rename/hardlink
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index e7e07891781b..840c71c66b30 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2051,15 +2051,22 @@ out_err1:
* @access: file access flags
* @disposition: file disposition flags
* @may_flags: set with MAY_ flags
+ * @is_dir: is creating open flags for directory
*
* Return: file open flags
*/
static int smb2_create_open_flags(bool file_present, __le32 access,
__le32 disposition,
- int *may_flags)
+ int *may_flags,
+ bool is_dir)
{
int oflags = O_NONBLOCK | O_LARGEFILE;
+ if (is_dir) {
+ access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
+ ksmbd_debug(SMB, "Discard write access to a directory\n");
+ }
+
if (access & FILE_READ_DESIRED_ACCESS_LE &&
access & FILE_WRITE_DESIRE_ACCESS_LE) {
oflags |= O_RDWR;
@@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work)
open_flags = smb2_create_open_flags(file_present, daccess,
req->CreateDisposition,
- &may_flags);
+ &may_flags,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
+ (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
if (open_flags & (O_CREAT | O_TRUNC)) {
@@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info = (struct filesystem_device_info *)rsp->Buffer;
- info->DeviceType = cpu_to_le32(stfs.f_type);
- info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK);
+ info->DeviceCharacteristics =
+ cpu_to_le32(FILE_DEVICE_IS_MOUNTED);
+ if (!test_tree_conn_flag(work->tcon,
+ KSMBD_TREE_CONN_FLAG_WRITABLE))
+ info->DeviceCharacteristics |=
+ cpu_to_le32(FILE_READ_ONLY_DEVICE);
rsp->OutputBufferLength = cpu_to_le32(8);
break;
}
diff --git a/fs/stat.c b/fs/stat.c
index 70bd3e888cfa..89ce1be56310 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -90,6 +90,37 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
EXPORT_SYMBOL(generic_fill_statx_attr);
/**
+ * generic_fill_statx_atomic_writes - Fill in atomic writes statx attributes
+ * @stat: Where to fill in the attribute flags
+ * @unit_min: Minimum supported atomic write length in bytes
+ * @unit_max: Maximum supported atomic write length in bytes
+ *
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
+ * atomic write unit_min and unit_max values.
+ */
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max)
+{
+ /* Confirm that the request type is known */
+ stat->result_mask |= STATX_WRITE_ATOMIC;
+
+ /* Confirm that the file attribute type is known */
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC;
+
+ if (unit_min) {
+ stat->atomic_write_unit_min = unit_min;
+ stat->atomic_write_unit_max = unit_max;
+ /* Initially only allow 1x segment */
+ stat->atomic_write_segments_max = 1;
+
+ /* Confirm atomic writes are actually supported */
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC;
+ }
+}
+EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes);
+
+/**
* vfs_getattr_nosec - getattr without security checks
* @path: file to get attributes from
* @stat: structure to return attributes in
@@ -214,6 +245,43 @@ int getname_statx_lookup_flags(int flags)
return lookup_flags;
}
+static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
+ u32 request_mask)
+{
+ int error = vfs_getattr(path, stat, request_mask, flags);
+
+ if (request_mask & STATX_MNT_ID_UNIQUE) {
+ stat->mnt_id = real_mount(path->mnt)->mnt_id_unique;
+ stat->result_mask |= STATX_MNT_ID_UNIQUE;
+ } else {
+ stat->mnt_id = real_mount(path->mnt)->mnt_id;
+ stat->result_mask |= STATX_MNT_ID;
+ }
+
+ if (path_mounted(path))
+ stat->attributes |= STATX_ATTR_MOUNT_ROOT;
+ stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
+
+ /*
+ * If this is a block device inode, override the filesystem
+ * attributes with the block device specific parameters that need to be
+ * obtained from the bdev backing inode.
+ */
+ if (S_ISBLK(stat->mode))
+ bdev_statx(path, stat, request_mask);
+
+ return error;
+}
+
+static int vfs_statx_fd(int fd, int flags, struct kstat *stat,
+ u32 request_mask)
+{
+ CLASS(fd_raw, f)(fd);
+ if (!f.file)
+ return -EBADF;
+ return vfs_statx_path(&f.file->f_path, flags, stat, request_mask);
+}
+
/**
* vfs_statx - Get basic and extra attributes by filename
* @dfd: A file descriptor representing the base dir for a relative filename
@@ -243,36 +311,13 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
retry:
error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
if (error)
- goto out;
-
- error = vfs_getattr(&path, stat, request_mask, flags);
-
- if (request_mask & STATX_MNT_ID_UNIQUE) {
- stat->mnt_id = real_mount(path.mnt)->mnt_id_unique;
- stat->result_mask |= STATX_MNT_ID_UNIQUE;
- } else {
- stat->mnt_id = real_mount(path.mnt)->mnt_id;
- stat->result_mask |= STATX_MNT_ID;
- }
-
- if (path.mnt->mnt_root == path.dentry)
- stat->attributes |= STATX_ATTR_MOUNT_ROOT;
- stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
-
- /* Handle STATX_DIOALIGN for block devices. */
- if (request_mask & STATX_DIOALIGN) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- if (S_ISBLK(inode->i_mode))
- bdev_statx_dioalign(inode, stat);
- }
-
+ return error;
+ error = vfs_statx_path(&path, flags, stat, request_mask);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-out:
return error;
}
@@ -289,18 +334,10 @@ int vfs_fstatat(int dfd, const char __user *filename,
* If AT_EMPTY_PATH is set, we expect the common case to be that
* empty path, and avoid doing all the extra pathname work.
*/
- if (dfd >= 0 && flags == AT_EMPTY_PATH) {
- char c;
-
- ret = get_user(c, filename);
- if (unlikely(ret))
- return ret;
+ if (flags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename))
+ return vfs_fstat(dfd, stat);
- if (likely(!c))
- return vfs_fstat(dfd, stat);
- }
-
- name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
+ name = getname_flags(filename, getname_statx_lookup_flags(statx_flags));
ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
putname(name);
@@ -488,34 +525,39 @@ static int do_readlinkat(int dfd, const char __user *pathname,
char __user *buf, int bufsiz)
{
struct path path;
+ struct filename *name;
int error;
- int empty = 0;
unsigned int lookup_flags = LOOKUP_EMPTY;
if (bufsiz <= 0)
return -EINVAL;
retry:
- error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
- if (!error) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- error = empty ? -ENOENT : -EINVAL;
- /*
- * AFS mountpoints allow readlink(2) but are not symlinks
- */
- if (d_is_symlink(path.dentry) || inode->i_op->readlink) {
- error = security_inode_readlink(path.dentry);
- if (!error) {
- touch_atime(&path);
- error = vfs_readlink(path.dentry, buf, bufsiz);
- }
- }
- path_put(&path);
- if (retry_estale(error, lookup_flags)) {
- lookup_flags |= LOOKUP_REVAL;
- goto retry;
+ name = getname_flags(pathname, lookup_flags);
+ error = filename_lookup(dfd, name, lookup_flags, &path, NULL);
+ if (unlikely(error)) {
+ putname(name);
+ return error;
+ }
+
+ /*
+ * AFS mountpoints allow readlink(2) but are not symlinks
+ */
+ if (d_is_symlink(path.dentry) ||
+ d_backing_inode(path.dentry)->i_op->readlink) {
+ error = security_inode_readlink(path.dentry);
+ if (!error) {
+ touch_atime(&path);
+ error = vfs_readlink(path.dentry, buf, bufsiz);
}
+ } else {
+ error = (name->name[0] == '\0') ? -ENOENT : -EINVAL;
+ }
+ path_put(&path);
+ putname(name);
+ if (retry_estale(error, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
}
return error;
}
@@ -659,6 +701,9 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
tmp.stx_subvol = stat->subvol;
+ tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
+ tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
+ tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
@@ -674,7 +719,8 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
return -EINVAL;
- /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+ /*
+ * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
* from userland.
*/
mask &= ~STATX_CHANGE_COOKIE;
@@ -686,16 +732,41 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
return cp_statx(&stat, buffer);
}
+int do_statx_fd(int fd, unsigned int flags, unsigned int mask,
+ struct statx __user *buffer)
+{
+ struct kstat stat;
+ int error;
+
+ if (mask & STATX__RESERVED)
+ return -EINVAL;
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+
+ /*
+ * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+ * from userland.
+ */
+ mask &= ~STATX_CHANGE_COOKIE;
+
+ error = vfs_statx_fd(fd, flags, &stat, mask);
+ if (error)
+ return error;
+
+ return cp_statx(&stat, buffer);
+}
+
/**
* sys_statx - System call to get enhanced stats
* @dfd: Base directory to pathwalk from *or* fd to stat.
- * @filename: File to stat or "" with AT_EMPTY_PATH
+ * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH
* @flags: AT_* flags to control pathwalk.
* @mask: Parts of statx struct actually required.
* @buffer: Result buffer.
*
* Note that fstat() can be emulated by setting dfd to the fd of interest,
- * supplying "" as the filename and setting AT_EMPTY_PATH in the flags.
+ * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH
+ * in the flags.
*/
SYSCALL_DEFINE5(statx,
int, dfd, const char __user *, filename, unsigned, flags,
@@ -703,9 +774,24 @@ SYSCALL_DEFINE5(statx,
struct statx __user *, buffer)
{
int ret;
+ unsigned lflags;
struct filename *name;
- name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL);
+ /*
+ * Short-circuit handling of NULL and "" paths.
+ *
+ * For a NULL path we require and accept only the AT_EMPTY_PATH flag
+ * (possibly |'d with AT_STATX flags).
+ *
+ * However, glibc on 32-bit architectures implements fstatat as statx
+ * with the "" pathname and AT_NO_AUTOMOUNT | AT_EMPTY_PATH flags.
+ * Supporting this results in the uglification below.
+ */
+ lflags = flags & ~(AT_NO_AUTOMOUNT | AT_STATX_SYNC_TYPE);
+ if (lflags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename))
+ return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer);
+
+ name = getname_flags(filename, getname_statx_lookup_flags(flags));
ret = do_statx(dfd, name, flags, mask, buffer);
putname(name);
diff --git a/fs/super.c b/fs/super.c
index b72f1d288e95..095ba793e10c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1502,8 +1502,17 @@ static int fs_bdev_thaw(struct block_device *bdev)
lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
+ /*
+ * The block device may have been frozen before it was claimed by a
+ * filesystem. Concurrently another process might try to mount that
+ * frozen block device and has temporarily claimed the block device for
+ * that purpose causing a concurrent fs_bdev_thaw() to end up here. The
+ * mounter is already about to abort mounting because they still saw an
+ * elevanted bdev->bd_fsfreeze_count so get_bdev_super() will return
+ * NULL in that case.
+ */
sb = get_bdev_super(bdev);
- if (WARN_ON_ONCE(!sb))
+ if (!sb)
return -EINVAL;
if (sb->s_op->thaw_super)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3365a30dc1e0..5c0d07ddbda2 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -591,4 +591,5 @@ static void __exit exit_sysv_fs(void)
module_init(init_sysv_fs)
module_exit(exit_sysv_fs)
+MODULE_DESCRIPTION("SystemV Filesystem");
MODULE_LICENSE("GPL");
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 7c29f4afc23d..1028ab6d9a74 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -296,9 +296,9 @@ enum {
};
static const struct fs_parameter_spec tracefs_param_specs[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -306,8 +306,6 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param
{
struct tracefs_fs_info *opts = fc->s_fs_info;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, tracefs_param_specs, param, &result);
@@ -316,16 +314,10 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param
switch (opt) {
case Opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return invalf(fc, "Unknown uid");
- opts->uid = uid;
+ opts->uid = result.uid;
break;
case Opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return invalf(fc, "Unknown gid");
- opts->gid = gid;
+ opts->gid = result.gid;
break;
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 27c85d92d1dc..61f25d3cf3f7 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -188,7 +188,6 @@ Eend:
"offset=%lu",
dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
fail:
- SetPageError(page);
return false;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index eee7320ab0b0..17e409ceaa33 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
goto out;
features = uffdio_api.features;
ret = -EINVAL;
- if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ if (uffdio_api.api != UFFD_API)
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
+
+ ret = -EINVAL;
+ if (features & ~uffdio_api.features)
+ goto err_out;
+
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 118dedef8ebe..fdb4da24d662 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -228,26 +228,19 @@ const struct inode_operations vboxsf_reg_iops = {
static int vboxsf_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
struct vboxsf_handle *sf_handle = file->private_data;
- loff_t off = page_offset(page);
+ loff_t off = folio_pos(folio);
u32 nread = PAGE_SIZE;
u8 *buf;
int err;
- buf = kmap(page);
+ buf = kmap_local_folio(folio, 0);
err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
- if (err == 0) {
- memset(&buf[nread], 0, PAGE_SIZE - nread);
- flush_dcache_page(page);
- SetPageUptodate(page);
- } else {
- SetPageError(page);
- }
+ buf = folio_zero_tail(folio, nread, buf + nread);
- kunmap(page);
- unlock_page(page);
+ kunmap_local(buf);
+ folio_end_read(folio, err == 0);
return err;
}
@@ -295,7 +288,6 @@ static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
kref_put(&sf_handle->refcount, vboxsf_handle_release);
if (err == 0) {
- ClearPageError(page);
/* mtime changed */
sf_i->force_restat = 1;
} else {
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index ffb1d565da39..e95b8a48d8a0 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -41,8 +41,8 @@ enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode,
static const struct fs_parameter_spec vboxsf_fs_parameters[] = {
fsparam_string ("nls", opt_nls),
- fsparam_u32 ("uid", opt_uid),
- fsparam_u32 ("gid", opt_gid),
+ fsparam_uid ("uid", opt_uid),
+ fsparam_gid ("gid", opt_gid),
fsparam_u32 ("ttl", opt_ttl),
fsparam_u32oct ("dmode", opt_dmode),
fsparam_u32oct ("fmode", opt_fmode),
@@ -55,8 +55,6 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct vboxsf_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
int opt;
opt = fs_parse(fc, vboxsf_fs_parameters, param, &result);
@@ -73,16 +71,10 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
param->string = NULL;
break;
case opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return -EINVAL;
- ctx->o.uid = uid;
+ ctx->o.uid = result.uid;
break;
case opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return -EINVAL;
- ctx->o.gid = gid;
+ ctx->o.gid = result.gid;
break;
case opt_ttl:
ctx->o.ttl = msecs_to_jiffies(result.uint_32);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c101cf266bc4..6af6f744fdd6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4058,20 +4058,32 @@ xfs_bmapi_reserve_delalloc(
xfs_extlen_t indlen;
uint64_t fdblocks;
int error;
- xfs_fileoff_t aoff = off;
+ xfs_fileoff_t aoff;
+ bool use_cowextszhint =
+ whichfork == XFS_COW_FORK && !prealloc;
+retry:
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
+ aoff = off;
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
- /* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK) {
+ /*
+ * If we're targetting the COW fork but aren't creating a speculative
+ * posteof preallocation, try to expand the reservation to align with
+ * the COW extent size hint if there's sufficient free space.
+ *
+ * Unlike the data fork, the CoW cancellation functions will free all
+ * the reservations at inactivation, so we don't require that every
+ * delalloc reservation have a dirty pagecache.
+ */
+ if (use_cowextszhint) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
@@ -4090,7 +4102,7 @@ xfs_bmapi_reserve_delalloc(
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
- return error;
+ goto out;
/*
* Split changing sb for alen and indlen since they could be coming
@@ -4140,6 +4152,17 @@ out_unreserve_frextents:
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
+out:
+ if (error == -ENOSPC || error == -EDQUOT) {
+ trace_xfs_delalloc_enospc(ip, off, len);
+
+ if (prealloc || use_cowextszhint) {
+ /* retry without any preallocation */
+ use_cowextszhint = false;
+ prealloc = 0;
+ goto retry;
+ }
+ }
return error;
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 97996cb79aaa..454b63ef7201 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -996,7 +996,7 @@ struct xfs_getparents_by_handle {
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
-#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range)
+#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e7a7bfbe75b4..513b50da6215 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -379,10 +379,13 @@ xfs_dinode_verify_fork(
/*
* A directory small enough to fit in the inode must be stored
* in local format. The directory sf <-> extents conversion
- * code updates the directory size accordingly.
+ * code updates the directory size accordingly. Directories
+ * being truncated have zero size and are not subject to this
+ * check.
*/
if (S_ISDIR(mode)) {
- if (be64_to_cpu(dip->di_size) <= fork_size &&
+ if (dip->di_size &&
+ be64_to_cpu(dip->di_size) <= fork_size &&
fork_format != XFS_DINODE_FMT_LOCAL)
return __this_address;
}
@@ -528,9 +531,19 @@ xfs_dinode_verify(
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
return __this_address;
- /* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
- return __this_address;
+ /*
+ * No zero-length symlinks/dirs unless they're unlinked and hence being
+ * inactivated.
+ */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
+ if (dip->di_version > 1) {
+ if (dip->di_nlink)
+ return __this_address;
+ } else {
+ if (dip->di_onlink)
+ return __this_address;
+ }
+ }
fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index ac2e77ebb54c..a4d9fbc21b83 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -486,13 +486,11 @@ out_unlock:
/*
* Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
+ * blocks.
*/
bool
xfs_can_free_eofblocks(
- struct xfs_inode *ip,
- bool force)
+ struct xfs_inode *ip)
{
struct xfs_bmbt_irec imap;
struct xfs_mount *mp = ip->i_mount;
@@ -526,11 +524,11 @@ xfs_can_free_eofblocks(
return false;
/*
- * Do not free real preallocated or append-only files unless the file
- * has delalloc blocks and we are forced to remove them.
+ * Only free real extents for inodes with persistent preallocations or
+ * the append-only flag.
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
- if (!force || ip->i_delayed_blks == 0)
+ if (ip->i_delayed_blks == 0)
return false;
/*
@@ -584,6 +582,22 @@ xfs_free_eofblocks(
/* Wait on dio to ensure i_size has settled. */
inode_dio_wait(VFS_I(ip));
+ /*
+ * For preallocated files only free delayed allocations.
+ *
+ * Note that this means we also leave speculative preallocations in
+ * place for preallocated files.
+ */
+ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
+ if (ip->i_delayed_blks) {
+ xfs_bmap_punch_delalloc_range(ip,
+ round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
+ LLONG_MAX);
+ }
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(xfs_is_shutdown(mp));
@@ -891,7 +905,7 @@ xfs_prepare_shift(
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
*/
- if (xfs_can_free_eofblocks(ip, true)) {
+ if (xfs_can_free_eofblocks(ip)) {
error = xfs_free_eofblocks(ip);
if (error)
return error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 51f84d8ff372..eb0895bfb9da 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */
-bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+bool xfs_can_free_eofblocks(struct xfs_inode *ip);
int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0953163a2d84..cf629302d48e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -86,9 +86,8 @@ xfs_inode_alloc(
return NULL;
}
- /* VFS doesn't initialise i_mode or i_state! */
+ /* VFS doesn't initialise i_mode! */
VFS_I(ip)->i_mode = 0;
- VFS_I(ip)->i_state = 0;
mapping_set_large_folios(VFS_I(ip)->i_mapping);
XFS_STATS_INC(mp, vn_active);
@@ -314,6 +313,7 @@ xfs_reinit_inode(
dev_t dev = inode->i_rdev;
kuid_t uid = inode->i_uid;
kgid_t gid = inode->i_gid;
+ unsigned long state = inode->i_state;
error = inode_init_always(mp->m_super, inode);
@@ -324,6 +324,7 @@ xfs_reinit_inode(
inode->i_rdev = dev;
inode->i_uid = uid;
inode->i_gid = gid;
+ inode->i_state = state;
mapping_set_large_folios(inode->i_mapping);
return error;
}
@@ -1155,7 +1156,7 @@ xfs_inode_free_eofblocks(
}
*lockflags |= XFS_IOLOCK_EXCL;
- if (xfs_can_free_eofblocks(ip, false))
+ if (xfs_can_free_eofblocks(ip))
return xfs_free_eofblocks(ip);
/* inode could be preallocated or append-only */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 58fb7a5062e1..a4e3cd8971fc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,6 +42,7 @@
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
+#include "xfs_sb.h"
struct kmem_cache *xfs_inode_cache;
@@ -870,9 +871,16 @@ xfs_init_new_inode(
* this saves us from needing to run a separate transaction to set the
* fork offset in the immediate future.
*/
- if (init_xattrs && xfs_has_attr(mp)) {
+ if (init_xattrs) {
ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
}
/*
@@ -1595,7 +1603,7 @@ xfs_release(
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
return 0;
- if (xfs_can_free_eofblocks(ip, false)) {
+ if (xfs_can_free_eofblocks(ip)) {
/*
* Check if the inode is being opened, written and closed
* frequently and we have delayed allocation blocks outstanding
@@ -1856,15 +1864,13 @@ xfs_inode_needs_inactive(
/*
* This file isn't being freed, so check if there are post-eof blocks
- * to free. @force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with broken
- * free space accounting.
+ * to free.
*
* Note: don't bother with iolock here since lockdep complains about
* acquiring it in reclaim context. We have the only reference to the
* inode at this point anyways.
*/
- return xfs_can_free_eofblocks(ip, true);
+ return xfs_can_free_eofblocks(ip);
}
/*
@@ -1947,15 +1953,11 @@ xfs_inactive(
if (VFS_I(ip)->i_nlink != 0) {
/*
- * force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with
- * broken free space accounting.
- *
* Note: don't bother with iolock here since lockdep complains
* about acquiring it in reclaim context. We have the only
* reference to the inode at this point anyways.
*/
- if (xfs_can_free_eofblocks(ip, true))
+ if (xfs_can_free_eofblocks(ip))
error = xfs_free_eofblocks(ip);
goto out;
@@ -2548,11 +2550,26 @@ xfs_ifree_cluster(
* This buffer may not have been correctly initialised as we
* didn't read it from disk. That's not important because we are
* only using to mark the buffer as stale in the log, and to
- * attach stale cached inodes on it. That means it will never be
- * dispatched for IO. If it is, we want to know about it, and we
- * want it to fail. We can acheive this by adding a write
- * verifier to the buffer.
+ * attach stale cached inodes on it.
+ *
+ * For the inode that triggered the cluster freeing, this
+ * attachment may occur in xfs_inode_item_precommit() after we
+ * have marked this buffer stale. If this buffer was not in
+ * memory before xfs_ifree_cluster() started, it will not be
+ * marked XBF_DONE and this will cause problems later in
+ * xfs_inode_item_precommit() when we trip over a (stale, !done)
+ * buffer to attached to the transaction.
+ *
+ * Hence we have to mark the buffer as XFS_DONE here. This is
+ * safe because we are also marking the buffer as XBF_STALE and
+ * XFS_BLI_STALE. That means it will never be dispatched for
+ * IO and it won't be unlocked until the cluster freeing has
+ * been committed to the journal and the buffer unpinned. If it
+ * is written, we want to know about it, and we want it to
+ * fail. We can acheive this by adding a write verifier to the
+ * buffer.
*/
+ bp->b_flags |= XBF_DONE;
bp->b_ops = &xfs_inode_buf_ops;
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 378342673925..414903885ab9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1148,33 +1148,23 @@ xfs_buffered_write_iomap_begin(
}
}
-retry:
- error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
- end_fsb - offset_fsb, prealloc_blocks,
- allocfork == XFS_DATA_FORK ? &imap : &cmap,
- allocfork == XFS_DATA_FORK ? &icur : &ccur,
- allocfork == XFS_DATA_FORK ? eof : cow_eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
- trace_xfs_delalloc_enospc(ip, offset, count);
- if (prealloc_blocks) {
- prealloc_blocks = 0;
- goto retry;
- }
- fallthrough;
- default:
- goto out_unlock;
- }
-
if (allocfork == XFS_COW_FORK) {
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &cmap,
+ &ccur, cow_eof);
+ if (error)
+ goto out_unlock;
+
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
goto found_cow;
}
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
+ eof);
+ if (error)
+ goto out_unlock;
+
/*
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff222827e550..a00dcbc77e12 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,8 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -811,6 +813,7 @@ xfs_setattr_size(
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
+ uint resblks = 0;
bool did_zeroing = false;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
@@ -917,7 +920,17 @@ xfs_setattr_size(
return error;
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ /*
+ * For realtime inode with more than one block rtextsize, we need the
+ * block reservation for bmap btree block allocations/splits that can
+ * happen since it could split the tail written extent and convert the
+ * right beyond EOF one to unwritten.
+ */
+ if (xfs_inode_has_bigrtalloc(ip))
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, 0, &tp);
if (error)
return error;
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index b20fa25a7e8d..052e5c98c105 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -46,6 +46,7 @@ mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
+mandatory-y += runtime-const.h
mandatory-y += rwonce.h
mandatory-y += sections.h
mandatory-y += serial.h
diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h
new file mode 100644
index 000000000000..670499459514
--- /dev/null
+++ b/include/asm-generic/runtime-const.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+/*
+ * This is the fallback for when the architecture doesn't
+ * support the runtime const operations.
+ *
+ * We just use the actual symbols as-is.
+ */
+#define runtime_const_ptr(sym) (sym)
+#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
+#define runtime_const_init(type,sym) do { } while (0)
+
+#endif
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 933ca6581aba..fabcefe8a80a 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
#ifndef sys_mmap
asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
- unsigned long fd, off_t pgoff);
+ unsigned long fd, unsigned long off);
#endif
#ifndef sys_rt_sigreturn
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..389a78415b9b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -944,6 +944,14 @@
#define CON_INITCALL \
BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
+#define RUNTIME_NAME(t,x) runtime_##t##_##x
+
+#define RUNTIME_CONST(t,x) \
+ . = ALIGN(8); \
+ RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \
+ *(RUNTIME_NAME(t,x)); \
+ }
+
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
#define KUNIT_TABLE() \
. = ALIGN(8); \
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 7428cb43952d..804f856ed3e5 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -7,51 +7,38 @@
struct request;
enum blk_integrity_flags {
- BLK_INTEGRITY_VERIFY = 1 << 0,
- BLK_INTEGRITY_GENERATE = 1 << 1,
+ BLK_INTEGRITY_NOVERIFY = 1 << 0,
+ BLK_INTEGRITY_NOGENERATE = 1 << 1,
BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
- BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
+ BLK_INTEGRITY_REF_TAG = 1 << 3,
+ BLK_INTEGRITY_STACKED = 1 << 4,
};
-struct blk_integrity_iter {
- void *prot_buf;
- void *data_buf;
- sector_t seed;
- unsigned int data_size;
- unsigned short interval;
- unsigned char tuple_size;
- unsigned char pi_offset;
- const char *disk_name;
-};
-
-typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
-typedef void (integrity_prepare_fn) (struct request *);
-typedef void (integrity_complete_fn) (struct request *, unsigned int);
-
-struct blk_integrity_profile {
- integrity_processing_fn *generate_fn;
- integrity_processing_fn *verify_fn;
- integrity_prepare_fn *prepare_fn;
- integrity_complete_fn *complete_fn;
- const char *name;
-};
+const char *blk_integrity_profile_name(struct blk_integrity *bi);
+bool queue_limits_stack_integrity(struct queue_limits *t,
+ struct queue_limits *b);
+static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
+ struct block_device *bdev)
+{
+ return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits);
+}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-void blk_integrity_register(struct gendisk *, struct blk_integrity *);
-void blk_integrity_unregister(struct gendisk *);
-int blk_integrity_compare(struct gendisk *, struct gendisk *);
int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
{
- struct blk_integrity *bi = &disk->queue->integrity;
+ return q->limits.integrity.tuple_size;
+}
- if (!bi->profile)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+ if (!blk_integrity_queue_supports_integrity(disk->queue))
return NULL;
-
- return bi;
+ return &disk->queue->limits.integrity;
}
static inline struct blk_integrity *
@@ -60,12 +47,6 @@ bdev_get_integrity(struct block_device *bdev)
return blk_get_integrity(bdev->bd_disk);
}
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
- return q->integrity.profile;
-}
-
static inline unsigned short
queue_max_integrity_segments(const struct request_queue *q)
{
@@ -100,14 +81,13 @@ static inline bool blk_integrity_rq(struct request *rq)
}
/*
- * Return the first bvec that contains integrity data. Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
*/
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
- return NULL;
- return rq->bio->bi_integrity->bip_vec;
+ return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+ rq->bio->bi_integrity->bip_iter);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -134,17 +114,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q)
{
return false;
}
-static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
-{
- return 0;
-}
-static inline void blk_integrity_register(struct gendisk *d,
- struct blk_integrity *b)
-{
-}
-static inline void blk_integrity_unregister(struct gendisk *d)
-{
-}
static inline unsigned short
queue_max_integrity_segments(const struct request_queue *q)
{
@@ -167,9 +136,11 @@ static inline int blk_integrity_rq(struct request *rq)
return 0;
}
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- return NULL;
+ /* the optimizer will remove all calls to this function */
+ return (struct bio_vec){ };
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
#endif /* _LINUX_BLK_INTEGRITY_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 781c4500491b..632edd71f8c6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,11 @@ typedef u16 blk_short_t;
*/
#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17)
+/*
+ * Invalid size or alignment.
+ */
+#define BLK_STS_INVAL ((__force blk_status_t)19)
+
/**
* blk_path_error - returns true if error may be path related
* @error: status the request was completed with
@@ -370,7 +375,7 @@ enum req_flag_bits {
__REQ_SWAP, /* swap I/O */
__REQ_DRV, /* for driver use */
__REQ_FS_PRIVATE, /* for file system (submitter) use */
-
+ __REQ_ATOMIC, /* for atomic write operations */
/*
* Command specific flags, keep last:
*/
@@ -402,6 +407,7 @@ enum req_flag_bits {
#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP)
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
+#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24c36929920b..b8196e219ac2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -105,9 +105,16 @@ enum {
struct disk_events;
struct badblocks;
+enum blk_integrity_checksum {
+ BLK_INTEGRITY_CSUM_NONE = 0,
+ BLK_INTEGRITY_CSUM_IP = 1,
+ BLK_INTEGRITY_CSUM_CRC = 2,
+ BLK_INTEGRITY_CSUM_CRC64 = 3,
+} __packed ;
+
struct blk_integrity {
- const struct blk_integrity_profile *profile;
unsigned char flags;
+ enum blk_integrity_checksum csum_type;
unsigned char tuple_size;
unsigned char pi_offset;
unsigned char interval_exp;
@@ -261,6 +268,7 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+/* blk_validate_limits() validates bsize, so drivers don't usually need to */
static inline int blk_validate_block_size(unsigned long bsize)
{
if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
@@ -275,17 +283,75 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
}
+/* flags set by the driver in queue_limits.features */
+typedef unsigned int __bitwise blk_features_t;
+
+/* supports a volatile write cache */
+#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0))
+
+/* supports passing on the FUA bit */
+#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1))
+
+/* rotational device (hard drive or floppy) */
+#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2))
+
+/* contributes to the random number pool */
+#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3))
+
+/* do disk/partitions IO accounting */
+#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4))
+
+/* don't modify data until writeback is done */
+#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5))
+
+/* always completes in submit context */
+#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6))
+
+/* supports REQ_NOWAIT */
+#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7))
+
+/* supports DAX */
+#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8))
+
+/* supports I/O polling */
+#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9))
+
+/* is a zoned device */
+#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10))
+
+/* supports PCI(e) p2p requests */
+#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12))
+
+/* skip this queue in blk_mq_(un)quiesce_tagset */
+#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
+
+/* bounce all highmem pages */
+#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
+
+/* undocumented magic for bcache */
+#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
+ ((__force blk_features_t)(1u << 15))
+
/*
- * BLK_BOUNCE_NONE: never bounce (default)
- * BLK_BOUNCE_HIGH: bounce all highmem pages
+ * Flags automatically inherited when stacking limits.
*/
-enum blk_bounce {
- BLK_BOUNCE_NONE,
- BLK_BOUNCE_HIGH,
-};
+#define BLK_FEAT_INHERIT_MASK \
+ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
+ BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
+
+/* internal flags in queue_limits.flags */
+typedef unsigned int __bitwise blk_flags_t;
+
+/* do not send FLUSH/FUA commands despite advertising a write cache */
+#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0))
+
+/* I/O topology is misaligned */
+#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
struct queue_limits {
- enum blk_bounce bounce;
+ blk_features_t features;
+ blk_flags_t flags;
unsigned long seg_boundary_mask;
unsigned long virt_boundary_mask;
@@ -310,14 +376,20 @@ struct queue_limits {
unsigned int discard_alignment;
unsigned int zone_write_granularity;
+ /* atomic write limits */
+ unsigned int atomic_write_hw_max;
+ unsigned int atomic_write_max_sectors;
+ unsigned int atomic_write_hw_boundary;
+ unsigned int atomic_write_boundary_sectors;
+ unsigned int atomic_write_hw_unit_min;
+ unsigned int atomic_write_unit_min;
+ unsigned int atomic_write_hw_unit_max;
+ unsigned int atomic_write_unit_max;
+
unsigned short max_segments;
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
- unsigned char misaligned;
- unsigned char discard_misaligned;
- unsigned char raid_partial_stripes_expensive;
- bool zoned;
unsigned int max_open_zones;
unsigned int max_active_zones;
@@ -327,13 +399,14 @@ struct queue_limits {
* due to possible offsets.
*/
unsigned int dma_alignment;
+ unsigned int dma_pad_mask;
+
+ struct blk_integrity integrity;
};
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
-void disk_set_zoned(struct gendisk *disk);
-
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
@@ -414,10 +487,6 @@ struct request_queue {
struct queue_limits limits;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- struct blk_integrity integrity;
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
#ifdef CONFIG_PM
struct device *dev;
enum rpm_status rpm_status;
@@ -439,8 +508,6 @@ struct request_queue {
*/
int id;
- unsigned int dma_pad_mask;
-
/*
* queue settings
*/
@@ -526,38 +593,20 @@ struct request_queue {
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
-#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
-#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
-#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
-#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
-#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */
-#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
-#define QUEUE_FLAG_WC 17 /* Write back caching */
-#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */
-#define QUEUE_FLAG_DAX 19 /* device supports DAX */
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
-#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
-#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
-#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
-#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
-#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
- (1UL << QUEUE_FLAG_SAME_COMP) | \
- (1UL << QUEUE_FLAG_NOWAIT))
+#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP)
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
@@ -565,16 +614,10 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-#define blk_queue_stable_writes(q) \
- test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
-#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
-#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_zone_resetall(q) \
- test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-#define blk_queue_pci_p2pdma(q) \
- test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
+#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
+#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
+#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
#define blk_queue_rq_alloc_time(q) \
test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
@@ -590,7 +633,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
#define blk_queue_skip_tagset_quiesce(q) \
- test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
+ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -620,16 +663,26 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q)
static inline bool blk_queue_is_zoned(struct request_queue *q)
{
- return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned;
+ return IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (q->limits.features & BLK_FEAT_ZONED);
}
#ifdef CONFIG_BLK_DEV_ZONED
-unsigned int bdev_nr_zones(struct block_device *bdev);
-
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
- return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0;
+ return disk->nr_zones;
+}
+bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int disk_nr_zones(struct gendisk *disk)
+{
+ return 0;
}
+static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
+{
+ return false;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
{
@@ -638,16 +691,9 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
return sector >> ilog2(disk->queue->limits.chunk_sectors);
}
-static inline void disk_set_max_open_zones(struct gendisk *disk,
- unsigned int max_open_zones)
-{
- disk->queue->limits.max_open_zones = max_open_zones;
-}
-
-static inline void disk_set_max_active_zones(struct gendisk *disk,
- unsigned int max_active_zones)
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
{
- disk->queue->limits.max_active_zones = max_active_zones;
+ return disk_nr_zones(bdev->bd_disk);
}
static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
@@ -660,36 +706,6 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
return bdev->bd_disk->queue->limits.max_active_zones;
}
-bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
- return 0;
-}
-
-static inline unsigned int disk_nr_zones(struct gendisk *disk)
-{
- return 0;
-}
-static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
-{
- return 0;
-}
-static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
-{
- return 0;
-}
-
-static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
-{
- return 0;
-}
-static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
-{
- return false;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
static inline unsigned int blk_queue_depth(struct request_queue *q)
{
if (q->queue_depth)
@@ -880,14 +896,15 @@ static inline bool bio_straddles_zones(struct bio *bio)
}
/*
- * Return how much of the chunk is left to be used for I/O at a given offset.
+ * Return how much within the boundary is left to be used for I/O at a given
+ * offset.
*/
-static inline unsigned int blk_chunk_sectors_left(sector_t offset,
- unsigned int chunk_sectors)
+static inline unsigned int blk_boundary_sectors_left(sector_t offset,
+ unsigned int boundary_sectors)
{
- if (unlikely(!is_power_of_2(chunk_sectors)))
- return chunk_sectors - sector_div(offset, chunk_sectors);
- return chunk_sectors - (offset & (chunk_sectors - 1));
+ if (unlikely(!is_power_of_2(boundary_sectors)))
+ return boundary_sectors - sector_div(offset, boundary_sectors);
+ return boundary_sectors - (offset & (boundary_sectors - 1));
}
/**
@@ -904,7 +921,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
*/
static inline struct queue_limits
queue_limits_start_update(struct request_queue *q)
- __acquires(q->limits_lock)
{
mutex_lock(&q->limits_lock);
return q->limits;
@@ -927,26 +943,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q)
}
/*
+ * These helpers are for drivers that have sloppy feature negotiation and might
+ * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O
+ * completion handler when the device returned an indicator that the respective
+ * feature is not actually supported. They are racy and the driver needs to
+ * cope with that. Try to avoid this scheme if you can.
+ */
+static inline void blk_queue_disable_discard(struct request_queue *q)
+{
+ q->limits.max_discard_sectors = 0;
+}
+
+static inline void blk_queue_disable_secure_erase(struct request_queue *q)
+{
+ q->limits.max_secure_erase_sectors = 0;
+}
+
+static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
+{
+ q->limits.max_write_zeroes_sectors = 0;
+}
+
+/*
* Access functions for manipulating queue properties
*/
-extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
-void blk_queue_max_secure_erase_sectors(struct request_queue *q,
- unsigned int max_sectors);
-extern void blk_queue_max_discard_sectors(struct request_queue *q,
- unsigned int max_discard_sectors);
-extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
- unsigned int max_write_same_sectors);
-extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
-extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
- unsigned int max_zone_append_sectors);
-extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
-void blk_queue_zone_write_granularity(struct request_queue *q,
- unsigned int size);
-extern void blk_queue_alignment_offset(struct request_queue *q,
- unsigned int alignment);
-void disk_update_readahead(struct gendisk *disk);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
-extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
@@ -954,9 +975,7 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
sector_t offset, const char *pfx);
-extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
-extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
struct blk_independent_access_ranges *
disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
@@ -1077,6 +1096,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
+#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */
extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
@@ -1204,12 +1224,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
- int retval = 512;
-
- if (q && q->limits.logical_block_size)
- retval = q->limits.logical_block_size;
-
- return retval;
+ return q->limits.logical_block_size;
}
static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
@@ -1295,29 +1310,38 @@ static inline bool bdev_nonrot(struct block_device *bdev)
static inline bool bdev_synchronous(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_SYNCHRONOUS,
- &bdev_get_queue(bdev)->queue_flags);
+ return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
}
static inline bool bdev_stable_writes(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_STABLE_WRITES,
- &bdev_get_queue(bdev)->queue_flags);
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
+ return true;
+ return q->limits.features & BLK_FEAT_STABLE_WRITES;
+}
+
+static inline bool blk_queue_write_cache(struct request_queue *q)
+{
+ return (q->limits.features & BLK_FEAT_WRITE_CACHE) &&
+ !(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED);
}
static inline bool bdev_write_cache(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
+ return blk_queue_write_cache(bdev_get_queue(bdev));
}
static inline bool bdev_fua(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
+ return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
}
static inline bool bdev_nowait(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
+ return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT;
}
static inline bool bdev_is_zoned(struct block_device *bdev)
@@ -1359,7 +1383,31 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
static inline int queue_dma_alignment(const struct request_queue *q)
{
- return q ? q->limits.dma_alignment : 511;
+ return q->limits.dma_alignment;
+}
+
+static inline unsigned int
+queue_atomic_write_unit_max_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_unit_max;
+}
+
+static inline unsigned int
+queue_atomic_write_unit_min_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_unit_min;
+}
+
+static inline unsigned int
+queue_atomic_write_boundary_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT;
+}
+
+static inline unsigned int
+queue_atomic_write_max_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_max_sectors << SECTOR_SHIFT;
}
static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
@@ -1374,10 +1422,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
bdev_logical_block_size(bdev) - 1);
}
+static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
+{
+ return lim->dma_alignment | lim->dma_pad_mask;
+}
+
static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
unsigned int len)
{
- unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+ unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
+
return !(addr & alignment) && !(len & alignment);
}
@@ -1563,7 +1617,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct path *, struct kstat *, u32);
void printk_all_partitions(void);
int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
@@ -1581,7 +1635,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct path *path, struct kstat *stat,
+ u32 request_mask)
{
}
static inline void printk_all_partitions(void)
@@ -1603,6 +1658,27 @@ struct io_comp_batch {
void (*complete)(struct io_comp_batch *);
};
+static inline bool bdev_can_atomic_write(struct block_device *bdev)
+{
+ struct request_queue *bd_queue = bdev->bd_queue;
+ struct queue_limits *limits = &bd_queue->limits;
+
+ if (!limits->atomic_write_unit_min)
+ return false;
+
+ if (bdev_is_partition(bdev)) {
+ sector_t bd_start_sect = bdev->bd_start_sect;
+ unsigned int alignment =
+ max(limits->atomic_write_unit_min,
+ limits->atomic_write_hw_boundary);
+
+ if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT))
+ return false;
+ }
+
+ return true;
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 50aa87f8d77f..e4070fb02b11 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -746,6 +746,8 @@ struct bpf_verifier_env {
/* Same as scratched_regs but for stack slots */
u64 scratched_stack_slots;
u64 prev_log_pos, prev_insn_print_pos;
+ /* buffer used to temporary hold constants as scalar registers */
+ struct bpf_reg_state fake_reg[2];
/* buffer used to generate temporary string representations,
* e.g., in reg_type_str() to generate reg_type string
*/
diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9e56fd12a9f..7c3e40c3295e 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -82,7 +82,7 @@
* as to avoid issues such as the compiler inlining or eliding either a static
* kfunc, or a global kfunc in an LTO build.
*/
-#define __bpf_kfunc __used noinline
+#define __bpf_kfunc __used __retain noinline
#define __bpf_kfunc_start_defs() \
__diag_push(); \
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index bd1e361b351c..f41c7f0ef91e 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -280,4 +280,18 @@ static inline void *bvec_virt(struct bio_vec *bvec)
return page_address(bvec->bv_page) + bvec->bv_offset;
}
+/**
+ * bvec_phys - return the physical address for a bvec
+ * @bvec: bvec to return the physical address for
+ */
+static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
+{
+ /*
+ * Note this open codes page_to_phys because page_to_phys is defined in
+ * <asm/io.h>, which we don't want to pull in here. If it ever moves to
+ * a sensible place we should start using it.
+ */
+ return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+}
+
#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index c2d09bc4f976..80c4181e194a 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -63,17 +63,20 @@
#define __free(_name) __cleanup(__free_##_name)
-#define __get_and_null_ptr(p) \
- ({ __auto_type __ptr = &(p); \
- __auto_type __val = *__ptr; \
- *__ptr = NULL; __val; })
+#define __get_and_null(p, nullvalue) \
+ ({ \
+ __auto_type __ptr = &(p); \
+ __auto_type __val = *__ptr; \
+ *__ptr = nullvalue; \
+ __val; \
+ })
static inline __must_check
const volatile void * __must_check_fn(const volatile void *val)
{ return val; }
#define no_free_ptr(p) \
- ((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
+ ((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
#define return_ptr(p) return no_free_ptr(p)
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 99155df162d0..2af44427107d 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -159,6 +159,7 @@ struct closure {
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+#define CLOSURE_MAGIC_STACK 0xc05451cc
unsigned int magic;
struct list_head all;
@@ -285,6 +286,21 @@ static inline void closure_get(struct closure *cl)
}
/**
+ * closure_get_not_zero
+ */
+static inline bool closure_get_not_zero(struct closure *cl)
+{
+ unsigned old = atomic_read(&cl->remaining);
+ do {
+ if (!(old & CLOSURE_REMAINING_MASK))
+ return false;
+
+ } while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1));
+
+ return true;
+}
+
+/**
* closure_init - Initialize a closure, setting the refcount to 1
* @cl: closure to initialize
* @parent: parent of the new closure. cl will take a refcount on it for its
@@ -308,6 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
+}
+
+static inline void closure_init_stack_release(struct closure *cl)
+{
+ memset(cl, 0, sizeof(struct closure));
+ atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
}
/**
@@ -355,6 +383,8 @@ do { \
*/
#define closure_return(_cl) continue_at((_cl), NULL, NULL)
+void closure_return_sync(struct closure *cl);
+
/**
* continue_at_nobarrier - jump to another function without barrier
*
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 233f61ec8afc..56cebaff0c91 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd,
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
struct compat_statfs64 __user *buf);
asmlinkage long compat_sys_truncate(const char __user *, compat_off_t);
-asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t);
+asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t);
/* No generic prototype for truncate64, ftruncate64, fallocate */
asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
int flags, umode_t mode);
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 93600de3800b..f14c275950b5 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# define __preserve_most
#endif
+/*
+ * Annotating a function/variable with __retain tells the compiler to place
+ * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
+ * instructs the linker to retain the object during garbage-cleanup or LTO
+ * phases.
+ *
+ * Note that the __used macro is also used to prevent functions or data
+ * being optimized out, but operates at the compiler/IR-level and may still
+ * allow unintended removal of objects during linking.
+ *
+ * Optional: only supported since gcc >= 11, clang >= 13
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
+ */
+#if __has_attribute(__retain__) && \
+ (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
+ defined(CONFIG_LTO_CLANG))
+# define __retain __attribute__((__retain__))
+#else
+# define __retain
+#endif
+
/* Compiler specific macros. */
#ifdef __clang__
#include <linux/compiler-clang.h>
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bf53e3894aae..bff956f7b2b9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -71,7 +71,7 @@ extern const struct qstr dotdot_name;
# define DNAME_INLINE_LEN 40 /* 192 bytes */
#else
# ifdef CONFIG_SMP
-# define DNAME_INLINE_LEN 40 /* 128 bytes */
+# define DNAME_INLINE_LEN 36 /* 128 bytes */
# else
# define DNAME_INLINE_LEN 44 /* 128 bytes */
# endif
@@ -89,13 +89,18 @@ struct dentry {
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+ /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
/* Ref lookup also touches following */
- struct lockref d_lockref; /* per-dentry lock and refcount */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; /* fs-specific data */
+ /* --- cacheline 2 boundary (128 bytes) --- */
+ struct lockref d_lockref; /* per-dentry lock and refcount
+ * keep separate from RCU lookup area if
+ * possible!
+ */
union {
struct list_head d_lru; /* LRU list */
@@ -278,6 +283,8 @@ static inline unsigned d_count(const struct dentry *dentry)
return dentry->d_lockref.count;
}
+ino_t d_parent_ino(struct dentry *dentry);
+
/*
* helper function for dentry_operations.d_dname() members
*/
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..15d28164bbbd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -358,6 +358,13 @@ struct dm_target {
bool discards_supported:1;
/*
+ * Automatically set by dm-core if this target supports
+ * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
+ * using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
+ */
+ bool zone_reset_all_supported:1;
+
+ /*
* Set if this target requires that discards be split on
* 'max_discard_sectors' boundaries.
*/
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index bb37ad5cc954..893a1d21dc1c 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -158,6 +158,7 @@ struct fid {
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
+#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
/**
* struct export_operations - for nfsd to communicate with file systems
@@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
struct fid *fid, int fh_len,
int fileid_type,
+ unsigned int flags,
int (*acceptable)(void *, struct dentry *),
void *context);
extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
diff --git a/include/linux/file.h b/include/linux/file.h
index 45d0f4800abd..237931f20739 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd);
DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
get_unused_fd_flags(flags), unsigned flags)
+/*
+ * take_fd() will take care to set @fd to -EBADF ensuring that
+ * CLASS(get_unused_fd) won't call put_unused_fd(). This makes it
+ * easier to rely on CLASS(get_unused_fd):
+ *
+ * struct file *f;
+ *
+ * CLASS(get_unused_fd, fd)(O_CLOEXEC);
+ * if (fd < 0)
+ * return fd;
+ *
+ * f = dentry_open(&path, O_RDONLY, current_cred());
+ * if (IS_ERR(f))
+ * return PTR_ERR(fd);
+ *
+ * fd_install(fd, f);
+ * return take_fd(fd);
+ */
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
extern void fd_install(unsigned int fd, struct file *file);
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0f12cf01070e..5669da513cd7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1208,18 +1208,18 @@ static inline bool bpf_jit_kallsyms_enabled(void)
return false;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym);
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
-static inline const char *
+static inline int
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- const char *ret = __bpf_address_lookup(addr, size, off, sym);
+ int ret = __bpf_address_lookup(addr, size, off, sym);
if (ret && modname)
*modname = NULL;
@@ -1263,11 +1263,11 @@ static inline bool bpf_jit_kallsyms_enabled(void)
return false;
}
-static inline const char *
+static inline int
__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
- return NULL;
+ return 0;
}
static inline bool is_bpf_text_address(unsigned long addr)
@@ -1286,11 +1286,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
return NULL;
}
-static inline const char *
+static inline int
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- return NULL;
+ return 0;
}
static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..fd34b5755c0b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,8 +125,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_EXEC ((__force fmode_t)(1 << 5))
/* File writes are restricted (block device specific) */
#define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6))
+/* File supports atomic writes */
+#define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7))
-/* FMODE_* bits 7 to 8 */
+/* FMODE_* bit 8 */
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)(1 << 9))
@@ -317,6 +319,7 @@ struct readahead_control;
#define IOCB_SYNC (__force int) RWF_SYNC
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
#define IOCB_APPEND (__force int) RWF_APPEND
+#define IOCB_ATOMIC (__force int) RWF_ATOMIC
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -351,6 +354,7 @@ struct readahead_control;
{ IOCB_SYNC, "SYNC" }, \
{ IOCB_NOWAIT, "NOWAIT" }, \
{ IOCB_APPEND, "APPEND" }, \
+ { IOCB_ATOMIC, "ATOMIC"}, \
{ IOCB_EVENTFD, "EVENTFD"}, \
{ IOCB_DIRECT, "DIRECT" }, \
{ IOCB_WRITE, "WRITE" }, \
@@ -660,9 +664,13 @@ struct inode {
};
dev_t i_rdev;
loff_t i_size;
- struct timespec64 __i_atime;
- struct timespec64 __i_mtime;
- struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */
+ time64_t i_atime_sec;
+ time64_t i_mtime_sec;
+ time64_t i_ctime_sec;
+ u32 i_atime_nsec;
+ u32 i_mtime_nsec;
+ u32 i_ctime_nsec;
+ u32 i_generation;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
@@ -719,10 +727,10 @@ struct inode {
unsigned i_dir_seq;
};
- __u32 i_generation;
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
+ /* 32-bit hole reserved for expanding i_fsnotify_mask */
struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
#endif
@@ -1538,23 +1546,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode);
static inline time64_t inode_get_atime_sec(const struct inode *inode)
{
- return inode->__i_atime.tv_sec;
+ return inode->i_atime_sec;
}
static inline long inode_get_atime_nsec(const struct inode *inode)
{
- return inode->__i_atime.tv_nsec;
+ return inode->i_atime_nsec;
}
static inline struct timespec64 inode_get_atime(const struct inode *inode)
{
- return inode->__i_atime;
+ struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode),
+ .tv_nsec = inode_get_atime_nsec(inode) };
+
+ return ts;
}
static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
struct timespec64 ts)
{
- inode->__i_atime = ts;
+ inode->i_atime_sec = ts.tv_sec;
+ inode->i_atime_nsec = ts.tv_nsec;
return ts;
}
@@ -1563,28 +1575,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode,
{
struct timespec64 ts = { .tv_sec = sec,
.tv_nsec = nsec };
+
return inode_set_atime_to_ts(inode, ts);
}
static inline time64_t inode_get_mtime_sec(const struct inode *inode)
{
- return inode->__i_mtime.tv_sec;
+ return inode->i_mtime_sec;
}
static inline long inode_get_mtime_nsec(const struct inode *inode)
{
- return inode->__i_mtime.tv_nsec;
+ return inode->i_mtime_nsec;
}
static inline struct timespec64 inode_get_mtime(const struct inode *inode)
{
- return inode->__i_mtime;
+ struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode),
+ .tv_nsec = inode_get_mtime_nsec(inode) };
+ return ts;
}
static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
struct timespec64 ts)
{
- inode->__i_mtime = ts;
+ inode->i_mtime_sec = ts.tv_sec;
+ inode->i_mtime_nsec = ts.tv_nsec;
return ts;
}
@@ -1598,23 +1614,27 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
static inline time64_t inode_get_ctime_sec(const struct inode *inode)
{
- return inode->__i_ctime.tv_sec;
+ return inode->i_ctime_sec;
}
static inline long inode_get_ctime_nsec(const struct inode *inode)
{
- return inode->__i_ctime.tv_nsec;
+ return inode->i_ctime_nsec;
}
static inline struct timespec64 inode_get_ctime(const struct inode *inode)
{
- return inode->__i_ctime;
+ struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode),
+ .tv_nsec = inode_get_ctime_nsec(inode) };
+
+ return ts;
}
static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
struct timespec64 ts)
{
- inode->__i_ctime = ts;
+ inode->i_ctime_sec = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
return ts;
}
@@ -1926,6 +1946,8 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
extern bool may_open_dev(const struct path *path);
umode_t mode_strip_sgid(struct mnt_idmap *idmap,
const struct inode *dir, umode_t mode);
+bool in_group_or_capable(struct mnt_idmap *idmap,
+ const struct inode *inode, vfsgid_t vfsgid);
/*
* This is the "filldir" function type, used by readdir() to let
@@ -2685,7 +2707,7 @@ static inline struct file *file_clone_open(struct file *file)
}
extern int filp_close(struct file *, fl_owner_t id);
-extern struct filename *getname_flags(const char __user *, int, int *);
+extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
@@ -3029,7 +3051,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *),
void *data);
-extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
+struct inode *iget5_locked(struct super_block *, unsigned long,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *), void *);
+struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *), void *);
extern struct inode * iget_locked(struct super_block *, unsigned long);
extern struct inode *find_inode_nowait(struct super_block *,
unsigned long,
@@ -3231,6 +3258,9 @@ extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -3351,6 +3381,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);
extern void generic_set_sb_d_ops(struct super_block *sb);
+extern int generic_ci_match(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *folded_name,
+ const u8 *de_name, u32 de_name_len);
static inline bool sb_has_encoding(const struct super_block *sb)
{
@@ -3403,7 +3437,8 @@ static inline int iocb_flags(struct file *file)
return res;
}
-static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
+static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
+ int rw_type)
{
int kiocb_flags = 0;
@@ -3422,6 +3457,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return -EOPNOTSUPP;
kiocb_flags |= IOCB_NOIO;
}
+ if (flags & RWF_ATOMIC) {
+ if (rw_type != WRITE)
+ return -EOPNOTSUPP;
+ if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
+ return -EOPNOTSUPP;
+ }
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
@@ -3436,20 +3477,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return 0;
}
-static inline ino_t parent_ino(struct dentry *dentry)
-{
- ino_t res;
-
- /*
- * Don't strictly need d_lock here? If the parent ino could change
- * then surely we'd have a deeper race in the caller?
- */
- spin_lock(&dentry->d_lock);
- res = dentry->d_parent->d_inode->i_ino;
- spin_unlock(&dentry->d_lock);
- return res;
-}
-
/* Transaction based IO helpers */
/*
@@ -3574,7 +3601,7 @@ static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
{
return ctx->actor(ctx, "..", 2, ctx->pos,
- parent_ino(file->f_path.dentry), DT_DIR);
+ d_parent_ino(file->f_path.dentry), DT_DIR);
}
static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
{
@@ -3613,4 +3640,23 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
int advice);
+static inline bool vfs_empty_path(int dfd, const char __user *path)
+{
+ char c;
+
+ if (dfd < 0)
+ return false;
+
+ /* We now allow NULL to be used for empty path. */
+ if (!path)
+ return true;
+
+ if (unlikely(get_user(c, path)))
+ return false;
+
+ return !c;
+}
+
+bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index d3350979115f..6cf713a7e6c6 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *,
*/
fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
- fs_param_is_path, fs_param_is_fd;
+ fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
/*
* Specification of the type of value a parameter wants.
@@ -57,6 +57,8 @@ struct fs_parse_result {
int int_32; /* For spec_s32/spec_enum */
unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */
u64 uint_64; /* For spec_u64 */
+ kuid_t uid;
+ kgid_t gid;
};
};
@@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
+#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
+#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
/* String parameter that allows empty argument */
#define fsparam_string_empty(NAME, OPT) \
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index bdf7f3eddf0a..4c91a019972b 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -19,6 +19,7 @@
enum fscache_cache_trace;
enum fscache_cookie_trace;
enum fscache_access_trace;
+enum fscache_volume_trace;
enum fscache_cache_state {
FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */
@@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie);
extern void fscache_io_error(struct fscache_cache *cache);
+extern struct fscache_volume *
+fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
+extern void fscache_put_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
extern void fscache_end_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 4da80e92f804..278620e063ab 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
{
const struct path *path;
- if (file->f_mode & FMODE_NONOTIFY)
+ /*
+ * FMODE_NONOTIFY are fds generated by fanotify itself which should not
+ * generate new events. We also don't want to generate events for
+ * FMODE_PATH fds (involves open & close events) as they are just
+ * handle creation / destruction events and not "real" file events.
+ */
+ if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
return 0;
path = &file->f_path;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 800995c425e0..b792274189a3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -86,15 +86,15 @@ struct ftrace_hash;
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
defined(CONFIG_DYNAMIC_FTRACE)
-const char *
+int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym);
#else
-static inline const char *
+static inline int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- return NULL;
+ return 0;
}
#endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 9709537370ee..424acb98c7c2 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -960,8 +960,6 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
#define builtin_i2c_driver(__i2c_driver) \
builtin_driver(__i2c_driver, i2c_add_driver)
-#endif /* I2C */
-
/* must call put_device() when done with returned i2c_client device */
struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode);
@@ -971,6 +969,28 @@ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode);
/* must call i2c_put_adapter() when done with returned i2c_adapter device */
struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode);
+#else /* I2C */
+
+static inline struct i2c_client *
+i2c_find_device_by_fwnode(struct fwnode_handle *fwnode)
+{
+ return NULL;
+}
+
+static inline struct i2c_adapter *
+i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode)
+{
+ return NULL;
+}
+
+static inline struct i2c_adapter *
+i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode)
+{
+ return NULL;
+}
+
+#endif /* !I2C */
+
#if IS_ENABLED(CONFIG_OF)
/* must call put_device() when done with returned i2c_client device */
static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b48570eaa449..3bb6198d1523 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -50,7 +50,7 @@ struct io_wq_work_list {
struct io_wq_work {
struct io_wq_work_node list;
- unsigned flags;
+ atomic_t flags;
/* place it here instead of io_kiocb as it fills padding and saves 4B */
int cancel_seq;
};
@@ -207,18 +207,9 @@ struct io_submit_state {
bool need_plug;
bool cq_flush;
unsigned short submit_nr;
- unsigned int cqes_count;
struct blk_plug plug;
};
-struct io_ev_fd {
- struct eventfd_ctx *cq_ev_fd;
- unsigned int eventfd_async: 1;
- struct rcu_head rcu;
- atomic_t refs;
- atomic_t ops;
-};
-
struct io_alloc_cache {
void **entries;
unsigned int nr_cached;
@@ -373,7 +364,6 @@ struct io_ring_ctx {
struct io_restriction restrictions;
/* slow path rsrc auxilary data, used by update/register */
- struct io_mapped_ubuf *dummy_ubuf;
struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data;
@@ -406,6 +396,9 @@ struct io_ring_ctx {
struct callback_head poll_wq_task_work;
struct list_head defer_list;
+ struct io_alloc_cache msg_cache;
+ spinlock_t msg_lock;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
struct list_head napi_list; /* track busy poll napi_id */
spinlock_t napi_lock; /* napi_list lock */
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 1068a7318d89..75a2fb8b16c3 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -21,6 +21,8 @@ enum kcov_mode {
KCOV_MODE_TRACE_PC = 2,
/* Collecting comparison operands mode. */
KCOV_MODE_TRACE_CMP = 3,
+ /* The process owns a KCOV remote reference. */
+ KCOV_MODE_REMOTE = 4,
};
#define KCOV_IN_CTXSW (1 << 30)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 13fb41d25da6..7d3bd7c9664a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap);
extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
struct ata_port_info *, struct Scsi_Host *);
extern void ata_port_probe(struct ata_port *ap);
+extern void ata_port_free(struct ata_port *ap);
extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
extern void ata_sas_tport_delete(struct ata_port *ap);
int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim,
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index f804b76cde44..44488b1ab9a9 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
#ifdef CONFIG_AUDIT
LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr,
- void **lsmrule)
+ void **lsmrule, gfp_t gfp)
LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule)
LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule)
LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5df52e15f7d6..d45bfb7cf81d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2029,7 +2029,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 pcc_ifa2[0x1];
u8 reserved_at_3f1[0xf];
- u8 reserved_at_400[0x400];
+ u8 reserved_at_400[0x40];
+
+ u8 reserved_at_440[0x8];
+ u8 max_num_eqs_24b[0x18];
+ u8 reserved_at_460[0x3a0];
};
enum mlx5_ifc_flow_destination_type {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9849dfda44d4..eb7c96d24ac0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -406,6 +406,11 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ALLOW_ANY_UNCACHED VM_NONE
#endif
+#ifdef CONFIG_64BIT
+/* VM is sealed, in vm_flags */
+#define VM_SEALED _BITUL(63)
+#endif
+
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
@@ -3776,14 +3781,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
static inline bool want_init_on_free(void)
{
return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
- &init_on_free);
-}
-
-DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
-static inline bool want_init_mlocked_on_free(void)
-{
- return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON,
- &init_mlocked_on_free);
+ &init_on_free);
}
extern bool _debug_pagealloc_enabled_early;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8f9c9590a42c..1dc6248feb83 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -654,13 +654,12 @@ enum zone_watermarks {
};
/*
- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
- * for THP which will usually be GFP_MOVABLE. Even if it is another type,
- * it should not contribute to serious fragmentation causing THP allocation
- * failures.
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists
+ * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list
+ * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define NR_PCP_THP 1
+#define NR_PCP_THP 2
#else
#define NR_PCP_THP 0
#endif
@@ -1980,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn)
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
+ struct mem_section_usage *usage = READ_ONCE(ms->usage);
- return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
+ return usage ? test_bit(idx, usage->subsection_map) : 0;
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..330ffb59efe5 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -931,11 +931,11 @@ int module_kallsyms_on_each_symbol(const char *modname,
* least KSYM_NAME_LEN long: a pointer to namebuf is returned if
* found, otherwise NULL.
*/
-const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname, const unsigned char **modbuildid,
- char *namebuf);
+int module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, const unsigned char **modbuildid,
+ char *namebuf);
int lookup_module_symbol_name(unsigned long addr, char *symname);
int lookup_module_symbol_attrs(unsigned long addr,
unsigned long *size,
@@ -964,14 +964,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname,
}
/* For kallsyms to ask for address resolution. NULL means not found. */
-static inline const char *module_address_lookup(unsigned long addr,
+static inline int module_address_lookup(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset,
char **modname,
const unsigned char **modbuildid,
char *namebuf)
{
- return NULL;
+ return 0;
}
static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 967aa9ea9f96..8ec8fed3bce8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -50,13 +50,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
extern int path_pts(struct path *path);
-extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
-
-static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
- struct path *path)
-{
- return user_path_at_empty(dfd, name, flags, path, NULL);
-}
+extern int user_path_at(int, const char __user *, unsigned, struct path *);
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 5601d14e2886..dab6a1734a22 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,6 +42,17 @@ struct nsproxy {
};
extern struct nsproxy init_nsproxy;
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns->ns), \
+ struct ipc_namespace *: &(__ns->ns), \
+ struct net *: &(__ns->ns), \
+ struct pid_namespace *: &(__ns->ns), \
+ struct mnt_namespace *: &(__ns->ns), \
+ struct time_namespace *: &(__ns->ns), \
+ struct user_namespace *: &(__ns->ns), \
+ struct uts_namespace *: &(__ns->ns))
+
/*
* A structure to encompass all bits needed to install
* a partial or complete new set of namespaces.
@@ -112,4 +123,6 @@ static inline void get_nsproxy(struct nsproxy *ns)
refcount_inc(&ns->count);
}
+DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T))
+
#endif
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 1d43371fafd2..eb19503604fe 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -15,6 +15,11 @@
#define NUMA_NO_NODE (-1)
#define NUMA_NO_MEMBLK (-1)
+static inline bool numa_valid_node(int nid)
+{
+ return nid >= 0 && nid < MAX_NUMNODES;
+}
+
/* optionally keep NUMA memory info available post init */
#ifdef CONFIG_NUMA_KEEP_MEMINFO
#define __initdata_or_meminfo
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 4109f1bd6128..89ea1ebd975a 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -920,6 +920,9 @@ struct nvmet_fc_target_port {
* further references to hosthandle.
* Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host().
*
+ * @host_traddr: called by the transport to retrieve the node name and
+ * port name of the host port address.
+ *
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
* Value is Mandatory. Must be at least 1.
@@ -975,6 +978,7 @@ struct nvmet_fc_target_template {
void (*ls_abort)(struct nvmet_fc_target_port *targetport,
void *hosthandle, struct nvmefc_ls_req *lsreq);
void (*host_release)(void *hosthandle);
+ int (*host_traddr)(void *hosthandle, u64 *wwnn, u64 *wwpn);
u32 max_hw_queues;
u16 max_sgl_segments;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..c12a329dd463 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -25,6 +25,9 @@
#define NVME_NSID_ALL 0xffffffff
+/* Special NSSR value, 'NVMe' */
+#define NVME_SUBSYS_RESET 0x4E564D65
+
enum nvme_subsys_type {
/* Referral to another discovery type target subsystem */
NVME_NQN_DISC = 1,
@@ -85,10 +88,11 @@ enum {
enum {
NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */
NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */
+ NVMF_RDMA_QPTYPE_INVALID = 0xff,
};
-/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
- * RDMA_QPTYPE field
+/* RDMA Provider Type codes for Discovery Log Page entry TSAS
+ * RDMA_PRTYPE field
*/
enum {
NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */
@@ -110,6 +114,7 @@ enum {
NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+ NVMF_TCP_SECTYPE_INVALID = 0xff,
};
#define NVME_AQ_DEPTH 32
@@ -1846,6 +1851,7 @@ enum {
/*
* Generic Command Status:
*/
+ NVME_SCT_GENERIC = 0x0,
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
@@ -1893,6 +1899,7 @@ enum {
/*
* Command Specific Status:
*/
+ NVME_SCT_COMMAND_SPECIFIC = 0x100,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
@@ -1966,6 +1973,7 @@ enum {
/*
* Media and Data Integrity Errors:
*/
+ NVME_SCT_MEDIA_ERROR = 0x200,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
@@ -1978,6 +1986,7 @@ enum {
/*
* Path-related Errors:
*/
+ NVME_SCT_PATH = 0x300,
NVME_SC_INTERNAL_PATH_ERROR = 0x300,
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
@@ -1986,11 +1995,17 @@ enum {
NVME_SC_HOST_PATH_ERROR = 0x370,
NVME_SC_HOST_ABORTED_CMD = 0x371,
- NVME_SC_CRD = 0x1800,
- NVME_SC_MORE = 0x2000,
- NVME_SC_DNR = 0x4000,
+ NVME_SC_MASK = 0x00ff, /* Status Code */
+ NVME_SCT_MASK = 0x0700, /* Status Code Type */
+ NVME_SCT_SC_MASK = NVME_SCT_MASK | NVME_SC_MASK,
+
+ NVME_STATUS_CRD = 0x1800, /* Command Retry Delayed */
+ NVME_STATUS_MORE = 0x2000,
+ NVME_STATUS_DNR = 0x4000, /* Do Not Retry */
};
+#define NVME_SCT(status) ((status) >> 8 & 7)
+
struct nvme_completion {
/*
* Used by Admin and Fabrics commands to return data:
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 104078afe0b1..b9e914e1face 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -944,15 +944,18 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
* mistaken for a page type value.
*/
-#define PAGE_TYPE_BASE 0xf0000000
-/* Reserve 0x0000007f to catch underflows of _mapcount */
-#define PAGE_MAPCOUNT_RESERVE -128
-#define PG_buddy 0x00000080
-#define PG_offline 0x00000100
-#define PG_table 0x00000200
-#define PG_guard 0x00000400
-#define PG_hugetlb 0x00000800
-#define PG_slab 0x00001000
+enum pagetype {
+ PG_buddy = 0x00000080,
+ PG_offline = 0x00000100,
+ PG_table = 0x00000200,
+ PG_guard = 0x00000400,
+ PG_hugetlb = 0x00000800,
+ PG_slab = 0x00001000,
+
+ PAGE_TYPE_BASE = 0xf0000000,
+ /* Reserve 0x0000007f to catch underflows of _mapcount */
+ PAGE_MAPCOUNT_RESERVE = -128,
+};
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1acf5bac7f50..8c236c651d1d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- bool ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = false;
+
+ rcu_read_lock();
+ /* avoid writing to the vmemmap area being remapped */
+ if (!page_is_fake_head(page) && page_ref_count(page) != u)
+ ret = atomic_add_unless(&page->_refcount, nr, u);
+ rcu_read_unlock();
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
@@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
return folio_ref_add_unless(folio, 1, 0);
}
-static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
-{
-#ifdef CONFIG_TINY_RCU
- /*
- * The caller guarantees the folio will not be freed from interrupt
- * context, so (on !SMP) we only need preemption to be disabled
- * and TINY_RCU does that for us.
- */
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
- folio_ref_add(folio, count);
-#else
- if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
- /* Either the folio has been freed, or will be freed. */
- return false;
- }
-#endif
- return true;
-}
-
-/**
- * folio_try_get_rcu - Attempt to increase the refcount on a folio.
- * @folio: The folio.
- *
- * This is a version of folio_try_get() optimised for non-SMP kernels.
- * If you are still holding the rcu_read_lock() after looking up the
- * page and know that the page cannot have its refcount decreased to
- * zero in interrupt context, you can use this instead of folio_try_get().
- *
- * Example users include get_user_pages_fast() (as pages are not unmapped
- * from interrupt context) and the page cache lookups (as pages are not
- * truncated from interrupt context). We also know that pages are not
- * frozen in interrupt context for the purposes of splitting or migration.
- *
- * You can also use this function if you're holding a lock that prevents
- * pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_lock or page table lock for page tables. In this case,
- * it will always succeed, and you could have used a plain folio_get(),
- * but it's sometimes more convenient to have a common function called
- * from both locked and RCU-protected contexts.
- *
- * Return: True if the reference count was successfully incremented.
- */
-static inline bool folio_try_get_rcu(struct folio *folio)
+static inline bool folio_ref_try_add(struct folio *folio, int count)
{
- return folio_ref_try_add_rcu(folio, 1);
+ return folio_ref_add_unless(folio, count, 0);
}
static inline int page_ref_freeze(struct page *page, int count)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ee633712bba0..a0a026d2d244 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
-#define MAX_PAGECACHE_ORDER 8
+#define PREFERRED_MAX_PAGECACHE_ORDER 8
#endif
+/*
+ * xas_split_alloc() does not support arbitrary orders. This implies no
+ * 512MB THP on ARM64 with 64KB base page size.
+ */
+#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
+#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
@@ -381,6 +388,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
*/
static inline bool mapping_large_folio_support(struct address_space *mapping)
{
+ /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */
+ VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
+ "Anonymous mapping always supports large folio");
+
return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
diff --git a/include/linux/path.h b/include/linux/path.h
index 475225a03d0d..ca073e70decd 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path)
*path = (struct path) { };
}
+/*
+ * Cleanup macro for use with __free(path_put). Avoids dereference and
+ * copying @path unlike DEFINE_FREE(). path_put() will handle the empty
+ * path correctly just ensure @path is initialized:
+ *
+ * struct path path __free(path_put) = {};
+ */
+#define __free_path_put path_put
+
#endif /* _LINUX_PATH_H */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 86ba5d33e43b..9cacadbd61f8 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page)
static inline void put_page_tag_ref(union codetag_ref *ref)
{
+ if (WARN_ON(!ref))
+ return;
+
page_ext_put(page_ext_from_codetag_ref(ref));
}
@@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
union codetag_ref *ref = get_page_tag_ref(page);
alloc_tag_sub_check(ref);
- if (ref && ref->ct)
- tag = ct_to_alloc_tag(ref->ct);
- put_page_tag_ref(ref);
+ if (ref) {
+ if (ref->ct)
+ tag = ct_to_alloc_tag(ref->ct);
+ put_page_tag_ref(ref);
+ }
}
return tag;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e6e83304558e..3be430cf3132 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1122,7 +1122,7 @@ struct phy_driver {
u8 index, enum led_brightness value);
/**
- * @led_blink_set: Set a PHY LED brightness. Index indicates
+ * @led_blink_set: Set a PHY LED blinking. Index indicates
* which of the PHYs led should be configured to blink. Delays
* are in milliseconds and if both are zero then a sensible
* default should be chosen. The call should adjust the
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 40afab23881a..65c5184470f1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer)
#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET
-int add_preferred_console_match(const char *match, const char *name,
- const short idx);
-
extern int console_printk[];
#define console_loglevel (console_printk[0])
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..a5f4b48fca18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#ifdef CONFIG_MEM_ALLOC_PROFILING
-static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
swap(current->alloc_tag, tag);
return tag;
}
-static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
diff --git a/include/linux/security.h b/include/linux/security.h
index 21cf70346b33..de3af33e6ff5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring,
#ifdef CONFIG_AUDIT
#ifdef CONFIG_SECURITY
-int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule);
+int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
+ gfp_t gfp);
int security_audit_rule_known(struct audit_krule *krule);
int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
void security_audit_rule_free(void *lsmrule);
@@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule);
#else
static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr,
- void **lsmrule)
+ void **lsmrule, gfp_t gfp)
{
return 0;
}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8cb65f50e830..aea25eef9a1a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -811,8 +811,7 @@ enum UART_TX_FLAGS {
if (pending < WAKEUP_CHARS) { \
uart_write_wakeup(__port); \
\
- if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \
- __port->ops->tx_empty(__port)) \
+ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \
__port->ops->stop_tx(__port); \
} \
\
@@ -852,6 +851,24 @@ enum UART_TX_FLAGS {
})
/**
+ * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @count: a limit of characters to send
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ * @tx_done: function to call after the loop is done
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \
+ unsigned int __count = (count); \
+ __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \
+ __count--); \
+})
+
+/**
* uart_port_tx -- transmit helper for uart_port
* @port: uart port
* @ch: variable to store a character to be written to the HW
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 89d16b90370b..c1f16cdab677 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
extern int __sys_socket(int family, int type, int protocol);
extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+ int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen);
extern int __sys_listen(int fd, int backlog);
+extern int __sys_listen_socket(struct socket *sock, int backlog);
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e8e1e798924f..67b9a15a5330 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -533,6 +533,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @queue_empty: signal green light for opportunistically skipping the queue
* for spi_sync transfers.
* @must_async: disable all fast paths in the core
+ * @defer_optimize_message: set to true if controller cannot pre-optimize messages
+ * and needs to defer the optimization step until the message is actually
+ * being transferred
*
* Each SPI controller can communicate with one or more @spi_device
* children. These make a small bus, sharing MOSI, MISO and SCK signals
@@ -776,6 +779,7 @@ struct spi_controller {
/* Flag for enabling opportunistic skipping of the queue in spi_sync */
bool queue_empty;
bool must_async;
+ bool defer_optimize_message;
};
static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
@@ -1085,12 +1089,13 @@ struct spi_transfer {
unsigned dummy_data:1;
unsigned cs_off:1;
unsigned cs_change:1;
- unsigned tx_nbits:3;
- unsigned rx_nbits:3;
+ unsigned tx_nbits:4;
+ unsigned rx_nbits:4;
unsigned timestamped:1;
#define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */
#define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */
#define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */
+#define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */
u8 bits_per_word;
struct spi_delay delay;
struct spi_delay cs_change_delay;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index bf92441dbad2..3d900c86981c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -54,6 +54,9 @@ struct kstat {
u32 dio_offset_align;
u64 change_cookie;
u64 subvol;
+ u32 atomic_write_unit_min;
+ u32 atomic_write_unit_max;
+ u32 atomic_write_segments_max;
};
/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/linux/string.h b/include/linux/string.h
index 60168aa2af07..9edace076ddb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si
extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp)
__realloc_size(2, 3);
/* lib/argv_split.c */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}
/* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9104952d323d..fff820c3e93e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
long nr,
struct io_event __user *events,
struct __kernel_timespec __user *timeout,
- const struct __aio_sigset *sig);
+ const struct __aio_sigset __user *sig);
asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
struct old_timespec32 __user *timeout,
- const struct __aio_sigset *sig);
+ const struct __aio_sigset __user *sig);
asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
@@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
u64 __user *mnt_ids, size_t nr_mnt_ids,
unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
-asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
+asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
#if BITS_PER_LONG == 32
asmlinkage long sys_truncate64(const char __user *path, loff_t length);
asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
@@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
umode_t mode);
asmlinkage long sys_openat2(int dfd, const char __user *filename,
- struct open_how *how, size_t size);
+ struct open_how __user *how, size_t size);
asmlinkage long sys_close(unsigned int fd);
asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags);
@@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid,
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
-asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
+asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters,
unsigned int nr_futexes, unsigned int flags,
struct __kernel_timespec __user *timeout, clockid_t clockid);
@@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
const struct rlimit64 __user *new_rlim,
struct rlimit64 __user *old_rlim);
asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags);
+#if defined(CONFIG_ARCH_SPLIT_ARG64)
+asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
+ unsigned int mask_1, unsigned int mask_2,
+ int dfd, const char __user * pathname);
+#else
asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
u64 mask, int fd,
const char __user *pathname);
+#endif
asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
struct file_handle __user *handle,
int __user *mnt_id, int flag);
@@ -907,7 +913,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
asmlinkage long sys_getrandom(char __user *buf, size_t count,
unsigned int flags);
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
asmlinkage long sys_execveat(int dfd, const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp, int flags);
@@ -960,11 +966,11 @@ asmlinkage long sys_cachestat(unsigned int fd,
struct cachestat_range __user *cstat_range,
struct cachestat __user *cstat, unsigned int flags);
asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
-asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
- u32 *size, u32 flags);
-asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
+ u32 __user *size, u32 flags);
+asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
u32 size, u32 flags);
-asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags);
+asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 248f4ac95642..2c59fe3efcd4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -41,18 +41,12 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
{
unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- if (rq->q->integrity.interval_exp)
- shift = rq->q->integrity.interval_exp;
-#endif
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
}
-extern const struct blk_integrity_profile t10_pi_type1_crc;
-extern const struct blk_integrity_profile t10_pi_type1_ip;
-extern const struct blk_integrity_profile t10_pi_type3_crc;
-extern const struct blk_integrity_profile t10_pi_type3_ip;
-
struct crc64_pi_tuple {
__be64 guard_tag;
__be16 app_tag;
@@ -72,14 +66,10 @@ static inline u64 ext_pi_ref_tag(struct request *rq)
{
unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- if (rq->q->integrity.interval_exp)
- shift = rq->q->integrity.interval_exp;
-#endif
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
}
-extern const struct blk_integrity_profile ext_pi_type1_crc64;
-extern const struct blk_integrity_profile ext_pi_type3_crc64;
-
#endif
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..e93ee8d936a9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
{
}
#endif
+
+static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
+{
#ifdef CONFIG_TCG_TPM2_HMAC
+ return chip->auth;
+#else
+ return NULL;
+#endif
+}
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
@@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
u8 *passphrase,
int passphraselen)
{
- tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
- passphraselen);
+ struct tpm_header *head;
+ int offset;
+
+ if (tpm2_chip_auth(chip)) {
+ tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen);
+ } else {
+ offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ head = (struct tpm_header *)buf->data;
+
+ /*
+ * If the only sessions are optional, the command tag must change to
+ * TPM2_ST_NO_SESSIONS.
+ */
+ if (tpm_buf_length(buf) == offset)
+ head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+ }
}
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
@@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- tpm_buf_append_u32(buf, handle);
- /* count the number of handles in the upper bits of flags */
- buf->handles++;
-}
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphraselen)
-{
- /* offset tells us where the sessions area begins */
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- u32 len = 9 + passphraselen;
-
- if (tpm_buf_length(buf) != offset) {
- /* not the first session so update the existing length */
- len += get_unaligned_be32(&buf->data[offset]);
- put_unaligned_be32(len, &buf->data[offset]);
- } else {
- tpm_buf_append_u32(buf, len);
- }
- /* auth handle */
- tpm_buf_append_u32(buf, TPM2_RS_PW);
- /* nonce */
- tpm_buf_append_u16(buf, 0);
- /* attributes */
- tpm_buf_append_u8(buf, 0);
- /* passphrase */
- tpm_buf_append_u16(buf, passphraselen);
- tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes,
- u8 *passphrase,
- int passphraselen)
-{
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- struct tpm_header *head = (struct tpm_header *) buf->data;
-
- /*
- * if the only sessions are optional, the command tag
- * must change to TPM2_ST_NO_SESSIONS
- */
- if (tpm_buf_length(buf) == offset)
- head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf)
{
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fb3993894536..d9968bfc8eac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -95,7 +95,7 @@ enum wq_misc_consts {
WORK_BUSY_RUNNING = 1 << 1,
/* maximum string length for set_worker_desc() */
- WORKER_DESC_LEN = 24,
+ WORKER_DESC_LEN = 32,
};
/* Convenience constants - of type 'unsigned long', not 'enum'! */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fe932ca3bc8c..e372a88e8c3f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -324,6 +324,17 @@ enum {
* claim to support it.
*/
HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE,
+
+ /*
+ * When this quirk is set, the reserved bits of Primary/Secondary_PHY
+ * inside the LE Extended Advertising Report events are discarded.
+ * This is required for some Apple/Broadcom controllers which
+ * abuse these reserved bits for unrelated flags.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
};
/* HCI device flags */
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 6a9d063e9f47..534c3386e714 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u8 event, u32 timeout,
struct sock *sk);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout);
void hci_cmd_sync_init(struct hci_dev *hdev);
void hci_cmd_sync_clear(struct hci_dev *hdev);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7d6b1254c92d..c0deaafebfdc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child);
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout);
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
struct request_sock *req,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cafc664ee531..45ad37adbe32 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -395,7 +395,7 @@ enum ieee80211_bss_change {
BSS_CHANGED_HE_OBSS_PD = 1<<28,
BSS_CHANGED_HE_BSS_COLOR = 1<<29,
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
- BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
+ BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31),
BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2796153b03da..188d41da1a40 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -619,6 +619,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
return (void *)set->data;
}
+static inline enum nft_data_types nft_set_datatype(const struct nft_set *set)
+{
+ return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
static inline bool nft_set_gc_is_pending(const struct nft_set *s)
{
return refcount_read(&s->refs) != 1;
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 02bbdc577f8e..a6a0bf4a247e 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -15,6 +15,9 @@ struct netns_nf {
const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
+#ifdef CONFIG_LWTUNNEL
+ struct ctl_table_header *nf_lwtnl_dir_header;
+#endif
#endif
struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
diff --git a/include/net/tcx.h b/include/net/tcx.h
index 72a3e75e539f..5ce0ce9e0c02 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -13,7 +13,7 @@ struct mini_Qdisc;
struct tcx_entry {
struct mini_Qdisc __rcu *miniq;
struct bpf_mprog_bundle bundle;
- bool miniq_active;
+ u32 miniq_active;
struct rcu_head rcu;
};
@@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress)
tcx_dec();
}
-static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
- const bool active)
+static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
{
ASSERT_RTNL();
- tcx_entry(entry)->miniq_active = active;
+ tcx_entry(entry)->miniq_active++;
+}
+
+static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active--;
}
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index 6b548dc2c496..1d79a3b536ce 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -69,8 +69,10 @@
#define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32))
/* Always retry ABORTED_COMMAND with ASC 0xc1 */
#define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33))
+/* Do not query the IO Advice Hints Grouping mode page */
+#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34))
-#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1
+#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS
#define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
(__force blist_flags_t) \
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 843106e1109f..70e1262b2e20 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -120,6 +120,7 @@
#define WRITE_SAME_16 0x93
#define ZBC_OUT 0x94
#define ZBC_IN 0x95
+#define WRITE_ATOMIC_16 0x9c
#define SERVICE_ACTION_BIDIRECTIONAL 0x9d
#define SERVICE_ACTION_IN_16 0x9e
#define SERVICE_ACTION_OUT_16 0x9f
diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index c11aaf8079fb..f6baa9a01868 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream
int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream,
struct dma_chan *chan);
int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream);
+int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream);
int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream,
dma_filter_fn filter_fn, void *filter_data);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..1527d5d45e01 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -9,9 +9,17 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>
+#include <uapi/linux/ioprio.h>
#define RWBS_LEN 8
+#define IOPRIO_CLASS_STRINGS \
+ { IOPRIO_CLASS_NONE, "none" }, \
+ { IOPRIO_CLASS_RT, "rt" }, \
+ { IOPRIO_CLASS_BE, "be" }, \
+ { IOPRIO_CLASS_IDLE, "idle" }, \
+ { IOPRIO_CLASS_INVALID, "invalid"}
+
#ifdef CONFIG_BUFFER_HEAD
DECLARE_EVENT_CLASS(block_buffer,
@@ -82,6 +90,7 @@ TRACE_EVENT(block_rq_requeue,
__field( dev_t, dev )
__field( sector_t, sector )
__field( unsigned int, nr_sector )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -90,16 +99,20 @@ TRACE_EVENT(block_rq_requeue,
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, 0)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), 0)
);
DECLARE_EVENT_CLASS(block_rq_completion,
@@ -113,6 +126,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( int , error )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -122,16 +136,20 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
__entry->error = blk_status_to_errno(error);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->error)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error)
);
/**
@@ -180,6 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( unsigned int, bytes )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__array( char, comm, TASK_COMM_LEN )
__dynamic_array( char, cmd, 1 )
@@ -190,17 +209,21 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
- TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __entry->bytes, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
);
/**
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index fadf406b5260..c978fa2893a5 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2556,9 +2556,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count,
TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
- TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr),
+ TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr,
+ u64 last_root_id, u64 last_ino),
- TP_ARGS(fs_info, nr_to_scan, nr),
+ TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino),
TP_STRUCT__entry_btrfs(
__field( long, nr_to_scan )
@@ -2570,8 +2571,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
TP_fast_assign_btrfs(fs_info,
__entry->nr_to_scan = nr_to_scan;
__entry->nr = nr;
- __entry->last_root_id = fs_info->extent_map_shrinker_last_root;
- __entry->last_ino = fs_info->extent_map_shrinker_last_ino;
+ __entry->last_root_id = last_root_id;
+ __entry->last_ino = last_ino;
),
TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
@@ -2581,9 +2582,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
- TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr),
+ TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr,
+ u64 last_root_id, u64 last_ino),
- TP_ARGS(fs_info, nr_dropped, nr),
+ TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino),
TP_STRUCT__entry_btrfs(
__field( long, nr_dropped )
@@ -2595,8 +2597,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
TP_fast_assign_btrfs(fs_info,
__entry->nr_dropped = nr_dropped;
__entry->nr = nr;
- __entry->last_root_id = fs_info->extent_map_shrinker_last_root;
- __entry->last_ino = fs_info->extent_map_shrinker_last_ino;
+ __entry->last_root_id = last_root_id;
+ __entry->last_ino = last_ino;
),
TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index a6190aa1b406..f1a73aa83fbb 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -35,12 +35,14 @@ enum fscache_volume_trace {
fscache_volume_get_cookie,
fscache_volume_get_create_work,
fscache_volume_get_hash_collision,
+ fscache_volume_get_withdraw,
fscache_volume_free,
fscache_volume_new_acquire,
fscache_volume_put_cookie,
fscache_volume_put_create_work,
fscache_volume_put_hash_collision,
fscache_volume_put_relinquish,
+ fscache_volume_put_withdraw,
fscache_volume_see_create_work,
fscache_volume_see_hash_wake,
fscache_volume_wait_create_work,
@@ -120,12 +122,14 @@ enum fscache_access_trace {
EM(fscache_volume_get_cookie, "GET cook ") \
EM(fscache_volume_get_create_work, "GET creat") \
EM(fscache_volume_get_hash_collision, "GET hcoll") \
+ EM(fscache_volume_get_withdraw, "GET withd") \
EM(fscache_volume_free, "FREE ") \
EM(fscache_volume_new_acquire, "NEW acq ") \
EM(fscache_volume_put_cookie, "PUT cook ") \
EM(fscache_volume_put_create_work, "PUT creat") \
EM(fscache_volume_put_hash_collision, "PUT hcoll") \
EM(fscache_volume_put_relinquish, "PUT relnq") \
+ EM(fscache_volume_put_withdraw, "PUT withd") \
EM(fscache_volume_see_create_work, "SEE creat") \
EM(fscache_volume_see_hash_wake, "SEE hwake") \
E_(fscache_volume_wait_create_work, "WAIT crea")
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index f1b5e816e7e5..ff33f41a9db7 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset,
TP_ARGS(q),
TP_STRUCT__entry(
- __string( dev, qdisc_dev(q)->name )
+ __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h
index 8e2d9b1b0e77..05f1945ed204 100644
--- a/include/trace/events/scsi.h
+++ b/include/trace/events/scsi.h
@@ -102,6 +102,7 @@
scsi_opcode_name(WRITE_32), \
scsi_opcode_name(WRITE_SAME_32), \
scsi_opcode_name(ATA_16), \
+ scsi_opcode_name(WRITE_ATOMIC_16), \
scsi_opcode_name(ATA_12))
#define scsi_hostbyte_name(result) { result, #result }
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d983c48a3b6a..d4cc26932ff4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index aaed8e12ad0b..926b1deb1116 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -802,6 +802,9 @@ struct drm_panthor_queue_submit {
* Must be 64-bit/8-byte aligned (the size of a CS instruction)
*
* Can be zero if stream_addr is zero too.
+ *
+ * When the stream size is zero, the queue submit serves as a
+ * synchronization point.
*/
__u32 stream_size;
@@ -822,6 +825,8 @@ struct drm_panthor_queue_submit {
* ensure the GPU doesn't get garbage when reading the indirect command
* stream buffers. If you want the cache flush to happen
* unconditionally, pass a zero here.
+ *
+ * Ignored when stream_size is zero.
*/
__u32 latest_flush;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1446c3bae515..d425b83181df 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -776,7 +776,13 @@ struct drm_xe_gem_create {
#define DRM_XE_GEM_CPU_CACHING_WC 2
/**
* @cpu_caching: The CPU caching mode to select for this object. If
- * mmaping the object the mode selected here will also be used.
+ * mmaping the object the mode selected here will also be used. The
+ * exception is when mapping system memory (including data evicted
+ * to system) on discrete GPUs. The caching mode selected will
+ * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+ * between GPU- and CPU is guaranteed. The caching mode of
+ * existing CPU-mappings will be updated transparently to
+ * user-space clients.
*/
__u16 cpu_caching;
/** @pad: MBZ */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 45e4e64fd664..191a7e88a8ab 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -329,9 +329,12 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 994bf7af0efe..2aaf7ee256ac 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -257,6 +257,8 @@ enum io_uring_op {
IORING_OP_FUTEX_WAITV,
IORING_OP_FIXED_FD_INSTALL,
IORING_OP_FTRUNCATE,
+ IORING_OP_BIND,
+ IORING_OP_LISTEN,
/* this goes last, obviously */
IORING_OP_LAST,
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index ad5478dbad00..225bc366ffcb 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
*/
struct statmount {
__u32 size; /* Total size, including strings */
- __u32 __spare1;
+ __u32 mnt_opts; /* [str] Mount options of the mount */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
@@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u64 __spare2[49];
char str[]; /* Variable size part containing strings */
};
@@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare;
__u64 mnt_id;
__u64 param;
+ __u64 mnt_ns_id;
};
/* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/*
* @mask bits for statmount(2)
@@ -202,10 +205,13 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
/*
* Special @mnt_id values that can be passed to listmount
*/
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index a0c8552b64ee..b133211331f6 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -15,5 +15,15 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID _IO(NSIO, 0x5)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
#endif /* __LINUX_NSFS_H */
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 72ec000a97cd..565fc0629fff 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/fcntl.h>
+#include <linux/ioctl.h>
/* Flags for pidfd_open(). */
#define PIDFD_NONBLOCK O_NONBLOCK
@@ -15,4 +16,17 @@
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
+#define PIDFS_IOCTL_MAGIC 0xFF
+
+#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
+#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
+#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
+#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
+#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
+#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
+#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+
#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 95770941ee2c..887a25286441 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -128,7 +128,13 @@ struct statx {
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
/* 0xa0 */
__u64 stx_subvol; /* Subvolume identifier */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -157,6 +163,7 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index bd1066754220..c102ef35d11e 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -43,6 +43,6 @@ struct trace_buffer_meta {
__u64 Reserved2;
};
-#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1)
+#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)
#endif /* _TRACE_MMAP_H_ */
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index f33d914d8f46..91583690bddc 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -8,11 +8,14 @@
#define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf)
#define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32)
#define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke)
+/* This ioctl is only supported with secure device nodes */
#define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4)
#define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create)
#define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap)
#define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap)
+/* This ioctl is only supported with secure device nodes */
#define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8)
+/* This ioctl is only supported with secure device nodes */
#define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
#define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map)
#define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap)
diff --git a/init/Kconfig b/init/Kconfig
index 72404c1f2157..febdea2afc3b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -883,7 +883,7 @@ config GCC10_NO_ARRAY_BOUNDS
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS
# Currently, disable -Wstringop-overflow for GCC globally.
config GCC_NO_STRINGOP_OVERFLOW
diff --git a/io_uring/Makefile b/io_uring/Makefile
index fc1b23c524e8..61923e11c767 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -4,9 +4,9 @@
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
tctx.o filetable.o rw.o net.o poll.o \
- uring_cmd.o openclose.o sqpoll.o \
- xattr.o nop.o fs.o splice.o sync.o \
- msg_ring.o advise.o openclose.o \
+ eventfd.o uring_cmd.o openclose.o \
+ sqpoll.o xattr.o nop.o fs.o splice.o \
+ sync.o msg_ring.o advise.o openclose.o \
epoll.o statx.o timeout.o fdinfo.o \
cancel.o waitid.o register.o \
truncate.o memmap.o
diff --git a/io_uring/advise.c b/io_uring/advise.c
index 7085804c513c..cb7b881665e5 100644
--- a/io_uring/advise.c
+++ b/io_uring/advise.c
@@ -17,14 +17,14 @@
struct io_fadvise {
struct file *file;
u64 offset;
- u32 len;
+ u64 len;
u32 advice;
};
struct io_madvise {
struct file *file;
u64 addr;
- u32 len;
+ u64 len;
u32 advice;
};
@@ -33,11 +33,13 @@ int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
struct io_madvise *ma = io_kiocb_to_cmd(req, struct io_madvise);
- if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
ma->addr = READ_ONCE(sqe->addr);
- ma->len = READ_ONCE(sqe->len);
+ ma->len = READ_ONCE(sqe->off);
+ if (!ma->len)
+ ma->len = READ_ONCE(sqe->len);
ma->advice = READ_ONCE(sqe->fadvise_advice);
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
@@ -78,11 +80,13 @@ int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
- if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
fa->offset = READ_ONCE(sqe->off);
- fa->len = READ_ONCE(sqe->len);
+ fa->len = READ_ONCE(sqe->addr);
+ if (!fa->len)
+ fa->len = READ_ONCE(sqe->len);
fa->advice = READ_ONCE(sqe->fadvise_advice);
if (io_fadvise_force_async(fa))
req->flags |= REQ_F_FORCE_ASYNC;
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
new file mode 100644
index 000000000000..b9384503a2b7
--- /dev/null
+++ b/io_uring/eventfd.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/eventfd.h>
+#include <linux/eventpoll.h>
+#include <linux/io_uring.h>
+#include <linux/io_uring_types.h>
+
+#include "io-wq.h"
+#include "eventfd.h"
+
+struct io_ev_fd {
+ struct eventfd_ctx *cq_ev_fd;
+ unsigned int eventfd_async: 1;
+ struct rcu_head rcu;
+ atomic_t refs;
+ atomic_t ops;
+};
+
+enum {
+ IO_EVENTFD_OP_SIGNAL_BIT,
+};
+
+static void io_eventfd_free(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_ctx_put(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+}
+
+static void io_eventfd_do_signal(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+
+ if (atomic_dec_and_test(&ev_fd->refs))
+ io_eventfd_free(rcu);
+}
+
+void io_eventfd_signal(struct io_ring_ctx *ctx)
+{
+ struct io_ev_fd *ev_fd = NULL;
+
+ if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+ return;
+
+ guard(rcu)();
+
+ /*
+ * rcu_dereference ctx->io_ev_fd once and use it for both for checking
+ * and eventfd_signal
+ */
+ ev_fd = rcu_dereference(ctx->io_ev_fd);
+
+ /*
+ * Check again if ev_fd exists incase an io_eventfd_unregister call
+ * completed between the NULL check of ctx->io_ev_fd at the start of
+ * the function and rcu_read_lock.
+ */
+ if (unlikely(!ev_fd))
+ return;
+ if (!atomic_inc_not_zero(&ev_fd->refs))
+ return;
+ if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+ goto out;
+
+ if (likely(eventfd_signal_allowed())) {
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ } else {
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+ call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+ return;
+ }
+ }
+out:
+ if (atomic_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
+void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
+{
+ bool skip;
+
+ spin_lock(&ctx->completion_lock);
+
+ /*
+ * Eventfd should only get triggered when at least one event has been
+ * posted. Some applications rely on the eventfd notification count
+ * only changing IFF a new CQE has been added to the CQ ring. There's
+ * no depedency on 1:1 relationship between how many times this
+ * function is called (and hence the eventfd count) and number of CQEs
+ * posted to the CQ ring.
+ */
+ skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+ if (skip)
+ return;
+
+ io_eventfd_signal(ctx);
+}
+
+int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int eventfd_async)
+{
+ struct io_ev_fd *ev_fd;
+ __s32 __user *fds = arg;
+ int fd;
+
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd)
+ return -EBUSY;
+
+ if (copy_from_user(&fd, fds, sizeof(*fds)))
+ return -EFAULT;
+
+ ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
+ if (!ev_fd)
+ return -ENOMEM;
+
+ ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
+ if (IS_ERR(ev_fd->cq_ev_fd)) {
+ int ret = PTR_ERR(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+ return ret;
+ }
+
+ spin_lock(&ctx->completion_lock);
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+
+ ev_fd->eventfd_async = eventfd_async;
+ ctx->has_evfd = true;
+ atomic_set(&ev_fd->refs, 1);
+ atomic_set(&ev_fd->ops, 0);
+ rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
+ return 0;
+}
+
+int io_eventfd_unregister(struct io_ring_ctx *ctx)
+{
+ struct io_ev_fd *ev_fd;
+
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd) {
+ ctx->has_evfd = false;
+ rcu_assign_pointer(ctx->io_ev_fd, NULL);
+ if (atomic_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
+ return 0;
+ }
+
+ return -ENXIO;
+}
diff --git a/io_uring/eventfd.h b/io_uring/eventfd.h
new file mode 100644
index 000000000000..d394f49c6321
--- /dev/null
+++ b/io_uring/eventfd.h
@@ -0,0 +1,8 @@
+
+struct io_ring_ctx;
+int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int eventfd_async);
+int io_eventfd_unregister(struct io_ring_ctx *ctx);
+
+void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
+void io_eventfd_signal(struct io_ring_ctx *ctx);
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 7d3316fe9bfc..f1e7c670add8 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -23,6 +23,7 @@
#include "io_uring.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ)
+#define WORKER_INIT_LIMIT 3
enum {
IO_WORKER_F_UP = 0, /* up and active */
@@ -58,6 +59,7 @@ struct io_worker {
unsigned long create_state;
struct callback_head create_work;
+ int init_retries;
union {
struct rcu_head rcu;
@@ -159,7 +161,7 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
struct io_wq_work *work)
{
- return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND));
+ return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND));
}
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
@@ -451,7 +453,7 @@ static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
{
- return work->flags >> IO_WQ_HASH_SHIFT;
+ return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT;
}
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
@@ -592,8 +594,9 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
next_hashed = wq_next_work(work);
- if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
- work->flags |= IO_WQ_WORK_CANCEL;
+ if (do_kill &&
+ (atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND))
+ atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
wq->do_work(work);
io_assign_current_work(worker, NULL);
@@ -744,7 +747,7 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
return true;
}
-static inline bool io_should_retry_thread(long err)
+static inline bool io_should_retry_thread(struct io_worker *worker, long err)
{
/*
* Prevent perpetual task_work retry, if the task (or its group) is
@@ -752,6 +755,8 @@ static inline bool io_should_retry_thread(long err)
*/
if (fatal_signal_pending(current))
return false;
+ if (worker->init_retries++ >= WORKER_INIT_LIMIT)
+ return false;
switch (err) {
case -EAGAIN:
@@ -778,7 +783,7 @@ static void create_worker_cont(struct callback_head *cb)
io_init_new_worker(wq, worker, tsk);
io_worker_release(worker);
return;
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
struct io_wq_acct *acct = io_wq_get_acct(worker);
atomic_dec(&acct->nr_running);
@@ -845,7 +850,7 @@ fail:
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
io_init_new_worker(wq, worker, tsk);
- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
kfree(worker);
goto fail;
} else {
@@ -891,7 +896,7 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
do {
- work->flags |= IO_WQ_WORK_CANCEL;
+ atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
wq->do_work(work);
work = wq->free_work(work);
} while (work);
@@ -926,7 +931,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
{
struct io_wq_acct *acct = io_work_get_acct(wq, work);
- unsigned long work_flags = work->flags;
+ unsigned int work_flags = atomic_read(&work->flags);
struct io_cb_cancel_data match = {
.fn = io_wq_work_match_item,
.data = work,
@@ -939,7 +944,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
* been marked as one that should not get executed, cancel it here.
*/
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
- (work->flags & IO_WQ_WORK_CANCEL)) {
+ (work_flags & IO_WQ_WORK_CANCEL)) {
io_run_cancel(work, wq);
return;
}
@@ -982,7 +987,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
unsigned int bit;
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
- work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
+ atomic_or(IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT), &work->flags);
}
static bool __io_wq_worker_cancel(struct io_worker *worker,
@@ -990,7 +995,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
struct io_wq_work *work)
{
if (work && match->fn(work, match->data)) {
- work->flags |= IO_WQ_WORK_CANCEL;
+ atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
__set_notify_signal(worker->task);
return true;
}
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index 2b2a6406dd8e..b3b004a7b625 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -56,7 +56,7 @@ bool io_wq_worker_stopped(void);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
- return work->flags & IO_WQ_WORK_HASHED;
+ return atomic_read(&work->flags) & IO_WQ_WORK_HASHED;
}
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 154b25b8a613..8e6faa942a6f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -95,12 +95,14 @@
#include "futex.h"
#include "napi.h"
#include "uring_cmd.h"
+#include "msg_ring.h"
#include "memmap.h"
#include "timeout.h"
#include "poll.h"
#include "rw.h"
#include "alloc_cache.h"
+#include "eventfd.h"
#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
@@ -314,6 +316,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
sizeof(struct io_async_rw));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct uring_cache));
+ spin_lock_init(&ctx->msg_lock);
+ ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
+ sizeof(struct io_kiocb));
ret |= io_futex_cache_init(ctx);
if (ret)
goto err;
@@ -350,6 +355,7 @@ err:
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
io_alloc_cache_free(&ctx->uring_cache, kfree);
+ io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
io_futex_cache_free(ctx);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
@@ -461,9 +467,9 @@ static void io_prep_async_work(struct io_kiocb *req)
}
req->work.list.next = NULL;
- req->work.flags = 0;
+ atomic_set(&req->work.flags, 0);
if (req->flags & REQ_F_FORCE_ASYNC)
- req->work.flags |= IO_WQ_WORK_CONCURRENT;
+ atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags);
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
req->flags |= io_file_get_flags(req->file);
@@ -479,7 +485,7 @@ static void io_prep_async_work(struct io_kiocb *req)
io_wq_hash_work(&req->work, file_inode(req->file));
} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
- req->work.flags |= IO_WQ_WORK_UNBOUND;
+ atomic_or(IO_WQ_WORK_UNBOUND, &req->work.flags);
}
}
@@ -519,7 +525,7 @@ static void io_queue_iowq(struct io_kiocb *req)
* worker for it).
*/
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
- req->work.flags |= IO_WQ_WORK_CANCEL;
+ atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
@@ -541,84 +547,6 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
}
}
-void io_eventfd_ops(struct rcu_head *rcu)
-{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
- int ops = atomic_xchg(&ev_fd->ops, 0);
-
- if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-
- /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
- * ordering in a race but if references are 0 we know we have to free
- * it regardless.
- */
- if (atomic_dec_and_test(&ev_fd->refs)) {
- eventfd_ctx_put(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- }
-}
-
-static void io_eventfd_signal(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd = NULL;
-
- rcu_read_lock();
- /*
- * rcu_dereference ctx->io_ev_fd once and use it for both for checking
- * and eventfd_signal
- */
- ev_fd = rcu_dereference(ctx->io_ev_fd);
-
- /*
- * Check again if ev_fd exists incase an io_eventfd_unregister call
- * completed between the NULL check of ctx->io_ev_fd at the start of
- * the function and rcu_read_lock.
- */
- if (unlikely(!ev_fd))
- goto out;
- if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- goto out;
- if (ev_fd->eventfd_async && !io_wq_current_is_worker())
- goto out;
-
- if (likely(eventfd_signal_allowed())) {
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
- } else {
- atomic_inc(&ev_fd->refs);
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
- call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
- else
- atomic_dec(&ev_fd->refs);
- }
-
-out:
- rcu_read_unlock();
-}
-
-static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-{
- bool skip;
-
- spin_lock(&ctx->completion_lock);
-
- /*
- * Eventfd should only get triggered when at least one event has been
- * posted. Some applications rely on the eventfd notification count
- * only changing IFF a new CQE has been added to the CQ ring. There's
- * no depedency on 1:1 relationship between how many times this
- * function is called (and hence the eventfd count) and number of CQEs
- * posted to the CQ ring.
- */
- skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- if (skip)
- return;
-
- io_eventfd_signal(ctx);
-}
-
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
if (ctx->poll_activated)
@@ -878,20 +806,43 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
-bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
+static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res,
+ u32 cflags)
{
bool filled;
- io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
if (!filled)
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+ return filled;
+}
+
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
+{
+ bool filled;
+
+ io_cq_lock(ctx);
+ filled = __io_post_aux_cqe(ctx, user_data, res, cflags);
io_cq_unlock_post(ctx);
return filled;
}
/*
+ * Must be called from inline task_work so we now a flush will happen later,
+ * and obviously with ctx->uring_lock held (tw always has that).
+ */
+void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
+{
+ if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+ spin_lock(&ctx->completion_lock);
+ io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+ spin_unlock(&ctx->completion_lock);
+ }
+ ctx->submit_state.cq_flush = true;
+}
+
+/*
* A helper for multishot requests posting additional CQEs.
* Should only be used from a task_work including IO_URING_F_MULTISHOT.
*/
@@ -1175,9 +1126,10 @@ void tctx_task_work(struct callback_head *cb)
WARN_ON_ONCE(ret);
}
-static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
+static inline void io_req_local_work_add(struct io_kiocb *req,
+ struct io_ring_ctx *ctx,
+ unsigned flags)
{
- struct io_ring_ctx *ctx = req->ctx;
unsigned nr_wait, nr_tw, nr_tw_prev;
struct llist_node *head;
@@ -1191,6 +1143,8 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
flags &= ~IOU_F_TWQ_LAZY_WAKE;
+ guard(rcu)();
+
head = READ_ONCE(ctx->work_llist.first);
do {
nr_tw_prev = 0;
@@ -1259,8 +1213,8 @@ static void io_req_normal_work_add(struct io_kiocb *req)
if (ctx->flags & IORING_SETUP_SQPOLL) {
struct io_sq_data *sqd = ctx->sq_data;
- if (wq_has_sleeper(&sqd->wait))
- wake_up(&sqd->wait);
+ if (sqd->thread)
+ __set_notify_signal(sqd->thread);
return;
}
@@ -1272,13 +1226,18 @@ static void io_req_normal_work_add(struct io_kiocb *req)
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
{
- if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
- rcu_read_lock();
- io_req_local_work_add(req, flags);
- rcu_read_unlock();
- } else {
+ if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ io_req_local_work_add(req, req->ctx, flags);
+ else
io_req_normal_work_add(req);
- }
+}
+
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+ unsigned flags)
+{
+ if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)))
+ return;
+ io_req_local_work_add(req, ctx, flags);
}
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
@@ -1467,7 +1426,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
}
__io_cq_unlock_post(ctx);
- if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
+ if (!wq_list_empty(&state->compl_reqs)) {
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
}
@@ -1813,14 +1772,14 @@ void io_wq_submit_work(struct io_wq_work *work)
io_arm_ltimeout(req);
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
- if (work->flags & IO_WQ_WORK_CANCEL) {
+ if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
fail:
io_req_task_queue_fail(req, err);
return;
}
if (!io_assign_file(req, def, issue_flags)) {
err = -EBADF;
- work->flags |= IO_WQ_WORK_CANCEL;
+ atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
goto fail;
}
@@ -2649,6 +2608,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
io_alloc_cache_free(&ctx->uring_cache, kfree);
+ io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
io_futex_cache_free(ctx);
io_destroy_buffers(ctx);
mutex_unlock(&ctx->uring_lock);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 726e6367af4d..e1ce908f0679 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -65,6 +65,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
+void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
@@ -73,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+ unsigned flags);
bool io_alloc_async_data(struct io_kiocb *req);
void io_req_task_queue(struct io_kiocb *req);
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
@@ -104,12 +107,6 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all);
-enum {
- IO_EVENTFD_OP_SIGNAL_BIT,
- IO_EVENTFD_OP_FREE_BIT,
-};
-
-void io_eventfd_ops(struct rcu_head *rcu);
void io_activate_pollwq(struct io_ring_ctx *ctx);
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 81c4a9d43729..29fa9285a33d 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -11,9 +11,9 @@
#include "io_uring.h"
#include "rsrc.h"
#include "filetable.h"
+#include "alloc_cache.h"
#include "msg_ring.h"
-
/* All valid masks for MSG_RING */
#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
IORING_MSG_RING_FLAGS_PASS)
@@ -68,59 +68,70 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
{
- if (!target_ctx->task_complete)
- return false;
- return current != target_ctx->submitter_task;
+ return target_ctx->task_complete;
}
-static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
- struct io_ring_ctx *ctx = req->file->private_data;
- struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
- struct task_struct *task = READ_ONCE(ctx->submitter_task);
+ struct io_ring_ctx *ctx = req->ctx;
- if (unlikely(!task))
- return -EOWNERDEAD;
+ io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
+ if (spin_trylock(&ctx->msg_lock)) {
+ if (io_alloc_cache_put(&ctx->msg_cache, req))
+ req = NULL;
+ spin_unlock(&ctx->msg_lock);
+ }
+ if (req)
+ kmem_cache_free(req_cachep, req);
+ percpu_ref_put(&ctx->refs);
+}
- init_task_work(&msg->tw, func);
- if (task_work_add(task, &msg->tw, TWA_SIGNAL))
+static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ int res, u32 cflags, u64 user_data)
+{
+ req->task = READ_ONCE(ctx->submitter_task);
+ if (!req->task) {
+ kmem_cache_free(req_cachep, req);
return -EOWNERDEAD;
+ }
+ req->cqe.user_data = user_data;
+ io_req_set_res(req, res, cflags);
+ percpu_ref_get(&ctx->refs);
+ req->ctx = ctx;
+ req->io_task_work.func = io_msg_tw_complete;
+ io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
+ return 0;
+}
- return IOU_ISSUE_SKIP_COMPLETE;
+static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req = NULL;
+
+ if (spin_trylock(&ctx->msg_lock)) {
+ req = io_alloc_cache_get(&ctx->msg_cache);
+ spin_unlock(&ctx->msg_lock);
+ }
+ if (req)
+ return req;
+ return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN);
}
-static void io_msg_tw_complete(struct callback_head *head)
+static int io_msg_data_remote(struct io_kiocb *req)
{
- struct io_msg *msg = container_of(head, struct io_msg, tw);
- struct io_kiocb *req = cmd_to_io_kiocb(msg);
struct io_ring_ctx *target_ctx = req->file->private_data;
- int ret = 0;
-
- if (current->flags & PF_EXITING) {
- ret = -EOWNERDEAD;
- } else {
- u32 flags = 0;
-
- if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
- flags = msg->cqe_flags;
-
- /*
- * If the target ring is using IOPOLL mode, then we need to be
- * holding the uring_lock for posting completions. Other ring
- * types rely on the regular completion locking, which is
- * handled while posting.
- */
- if (target_ctx->flags & IORING_SETUP_IOPOLL)
- mutex_lock(&target_ctx->uring_lock);
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
- ret = -EOVERFLOW;
- if (target_ctx->flags & IORING_SETUP_IOPOLL)
- mutex_unlock(&target_ctx->uring_lock);
- }
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct io_kiocb *target;
+ u32 flags = 0;
- if (ret < 0)
- req_set_fail(req);
- io_req_queue_tw_complete(req, ret);
+ target = io_msg_get_kiocb(req->ctx);
+ if (unlikely(!target))
+ return -ENOMEM;
+
+ if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
+ flags = msg->cqe_flags;
+
+ return io_msg_remote_post(target_ctx, target, msg->len, flags,
+ msg->user_data);
}
static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
@@ -138,7 +149,7 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
return -EBADFD;
if (io_msg_need_remote(target_ctx))
- return io_msg_exec_remote(req, io_msg_tw_complete);
+ return io_msg_data_remote(req);
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
flags = msg->cqe_flags;
@@ -218,6 +229,22 @@ static void io_msg_tw_fd_complete(struct callback_head *head)
io_req_queue_tw_complete(req, ret);
}
+static int io_msg_fd_remote(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct task_struct *task = READ_ONCE(ctx->submitter_task);
+
+ if (unlikely(!task))
+ return -EOWNERDEAD;
+
+ init_task_work(&msg->tw, io_msg_tw_fd_complete);
+ if (task_work_add(task, &msg->tw, TWA_SIGNAL))
+ return -EOWNERDEAD;
+
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
@@ -240,7 +267,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
}
if (io_msg_need_remote(target_ctx))
- return io_msg_exec_remote(req, io_msg_tw_fd_complete);
+ return io_msg_fd_remote(req);
return io_msg_install_complete(req, issue_flags);
}
@@ -294,3 +321,10 @@ done:
io_req_set_res(req, ret, 0);
return IOU_OK;
}
+
+void io_msg_cache_free(const void *entry)
+{
+ struct io_kiocb *req = (struct io_kiocb *) entry;
+
+ kmem_cache_free(req_cachep, req);
+}
diff --git a/io_uring/msg_ring.h b/io_uring/msg_ring.h
index 3987ee6c0e5f..3030f3942f0f 100644
--- a/io_uring/msg_ring.h
+++ b/io_uring/msg_ring.h
@@ -3,3 +3,4 @@
int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags);
void io_msg_ring_cleanup(struct io_kiocb *req);
+void io_msg_cache_free(const void *entry);
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 8c18ede595c4..762254a7ff3f 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -283,7 +283,7 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
s64 poll_to_ns = timespec64_to_ns(ts);
if (poll_to_ns > 0) {
u64 val = poll_to_ns + 999;
- do_div(val, (s64) 1000);
+ do_div(val, 1000);
poll_to = val;
}
}
diff --git a/io_uring/net.c b/io_uring/net.c
index 7c98c4d50946..7b75da2e7826 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -51,6 +51,16 @@ struct io_connect {
bool seen_econnaborted;
};
+struct io_bind {
+ struct file *file;
+ int addr_len;
+};
+
+struct io_listen {
+ struct file *file;
+ int backlog;
+};
+
struct io_sr_msg {
struct file *file;
union {
@@ -817,20 +827,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
bool mshot_finished, unsigned issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- unsigned int cflags;
-
- if (sr->flags & IORING_RECVSEND_BUNDLE)
- cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret),
- issue_flags);
- else
- cflags = io_put_kbuf(req, issue_flags);
+ unsigned int cflags = 0;
if (kmsg->msg.msg_inq > 0)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- /* bundle with no more immediate buffers, we're done */
- if (sr->flags & IORING_RECVSEND_BUNDLE && req->flags & REQ_F_BL_EMPTY)
- goto finish;
+ if (sr->flags & IORING_RECVSEND_BUNDLE) {
+ cflags |= io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret),
+ issue_flags);
+ /* bundle with no more immediate buffers, we're done */
+ if (req->flags & REQ_F_BL_EMPTY)
+ goto finish;
+ } else {
+ cflags |= io_put_kbuf(req, issue_flags);
+ }
/*
* Fill CQE for this receive and see if we should keep trying to
@@ -1127,16 +1137,18 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
flags |= MSG_DONTWAIT;
retry_multishot:
- kmsg->msg.msg_inq = -1;
- kmsg->msg.msg_flags = 0;
-
if (io_do_buffer_select(req)) {
ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ kmsg->msg.msg_inq = -1;
goto out_free;
+ }
sr->buf = NULL;
}
+ kmsg->msg.msg_flags = 0;
+ kmsg->msg.msg_inq = -1;
+
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
@@ -1715,6 +1727,70 @@ out:
return IOU_OK;
}
+int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
+ struct sockaddr __user *uaddr;
+ struct io_async_msghdr *io;
+
+ if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
+ return -EINVAL;
+
+ uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ bind->addr_len = READ_ONCE(sqe->addr2);
+
+ io = io_msg_alloc_async(req);
+ if (unlikely(!io))
+ return -ENOMEM;
+ return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
+}
+
+int io_bind(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
+ struct io_async_msghdr *io = req->async_data;
+ struct socket *sock;
+ int ret;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return 0;
+}
+
+int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
+
+ if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
+ return -EINVAL;
+
+ listen->backlog = READ_ONCE(sqe->len);
+ return 0;
+}
+
+int io_listen(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
+ struct socket *sock;
+ int ret;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = __sys_listen_socket(sock, listen->backlog);
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return 0;
+}
+
void io_netmsg_cache_free(const void *entry)
{
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
diff --git a/io_uring/net.h b/io_uring/net.h
index 0eb1c1920fc9..52bfee05f06a 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -49,6 +49,12 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags);
int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
void io_send_zc_cleanup(struct io_kiocb *req);
+int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_bind(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_listen(struct io_kiocb *req, unsigned int issue_flags);
+
void io_netmsg_cache_free(const void *entry);
#else
static inline void io_netmsg_cache_free(const void *entry)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 2e3b7b16effb..a2be3bbca5ff 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -495,6 +495,26 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_ftruncate_prep,
.issue = io_ftruncate,
},
+ [IORING_OP_BIND] = {
+#if defined(CONFIG_NET)
+ .needs_file = 1,
+ .prep = io_bind_prep,
+ .issue = io_bind,
+ .async_size = sizeof(struct io_async_msghdr),
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
+ [IORING_OP_LISTEN] = {
+#if defined(CONFIG_NET)
+ .needs_file = 1,
+ .prep = io_listen_prep,
+ .issue = io_listen,
+ .async_size = sizeof(struct io_async_msghdr),
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -716,6 +736,12 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_FTRUNCATE] = {
.name = "FTRUNCATE",
},
+ [IORING_OP_BIND] = {
+ .name = "BIND",
+ },
+ [IORING_OP_LISTEN] = {
+ .name = "LISTEN",
+ },
};
const char *io_uring_get_opcode(u8 opcode)
@@ -725,6 +751,14 @@ const char *io_uring_get_opcode(u8 opcode)
return "INVALID";
}
+bool io_uring_op_supported(u8 opcode)
+{
+ if (opcode < IORING_OP_LAST &&
+ io_issue_defs[opcode].prep != io_eopnotsupp_prep)
+ return true;
+ return false;
+}
+
void __init io_uring_optable_init(void)
{
int i;
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index 7ee6f5aa90aa..14456436ff74 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -17,8 +17,6 @@ struct io_issue_def {
unsigned poll_exclusive : 1;
/* op supports buffer selection */
unsigned buffer_select : 1;
- /* opcode is not supported by this kernel */
- unsigned not_supported : 1;
/* skip auditing */
unsigned audit_skip : 1;
/* supports ioprio */
@@ -47,5 +45,7 @@ struct io_cold_def {
extern const struct io_issue_def io_issue_defs[];
extern const struct io_cold_def io_cold_defs[];
+bool io_uring_op_supported(u8 opcode);
+
void io_uring_optable_init(void);
#endif
diff --git a/io_uring/register.c b/io_uring/register.c
index c0010a66a6f2..e3c20be5a198 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -27,65 +27,11 @@
#include "cancel.h"
#include "kbuf.h"
#include "napi.h"
+#include "eventfd.h"
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
- unsigned int eventfd_async)
-{
- struct io_ev_fd *ev_fd;
- __s32 __user *fds = arg;
- int fd;
-
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd)
- return -EBUSY;
-
- if (copy_from_user(&fd, fds, sizeof(*fds)))
- return -EFAULT;
-
- ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
- if (!ev_fd)
- return -ENOMEM;
-
- ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
- if (IS_ERR(ev_fd->cq_ev_fd)) {
- int ret = PTR_ERR(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- return ret;
- }
-
- spin_lock(&ctx->completion_lock);
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
-
- ev_fd->eventfd_async = eventfd_async;
- ctx->has_evfd = true;
- rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
- atomic_set(&ev_fd->refs, 1);
- atomic_set(&ev_fd->ops, 0);
- return 0;
-}
-
-int io_eventfd_unregister(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd;
-
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd) {
- ctx->has_evfd = false;
- rcu_assign_pointer(ctx->io_ev_fd, NULL);
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
- call_rcu(&ev_fd->rcu, io_eventfd_ops);
- return 0;
- }
-
- return -ENXIO;
-}
-
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
@@ -93,9 +39,10 @@ static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
size_t size;
int i, ret;
+ if (nr_args > IORING_OP_LAST)
+ nr_args = IORING_OP_LAST;
+
size = struct_size(p, ops, nr_args);
- if (size == SIZE_MAX)
- return -EOVERFLOW;
p = kzalloc(size, GFP_KERNEL);
if (!p)
return -ENOMEM;
@@ -108,12 +55,10 @@ static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
goto out;
p->last_op = IORING_OP_LAST - 1;
- if (nr_args > IORING_OP_LAST)
- nr_args = IORING_OP_LAST;
for (i = 0; i < nr_args; i++) {
p->ops[i].op = i;
- if (!io_issue_defs[i].not_supported)
+ if (io_uring_op_supported(i))
p->ops[i].flags = IO_URING_OP_SUPPORTED;
}
p->ops_len = i;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index edb9c5baf2e2..a860516bf448 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -85,31 +85,6 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
return 0;
}
-static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
- void __user *arg, unsigned index)
-{
- struct iovec __user *src;
-
-#ifdef CONFIG_COMPAT
- if (ctx->compat) {
- struct compat_iovec __user *ciovs;
- struct compat_iovec ciov;
-
- ciovs = (struct compat_iovec __user *) arg;
- if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
- return -EFAULT;
-
- dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
- dst->iov_len = ciov.iov_len;
- return 0;
- }
-#endif
- src = (struct iovec __user *) arg;
- if (copy_from_user(dst, &src[index], sizeof(*dst)))
- return -EFAULT;
- return 0;
-}
-
static int io_buffer_validate(struct iovec *iov)
{
unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
@@ -249,7 +224,7 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
ret = io_run_task_work_sig(ctx);
if (ret < 0) {
- __set_current_state(TASK_RUNNING);
+ finish_wait(&ctx->rsrc_quiesce_wq, &we);
mutex_lock(&ctx->uring_lock);
if (list_empty(&ctx->rsrc_ref_list))
ret = 0;
@@ -257,7 +232,6 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
}
schedule();
- __set_current_state(TASK_RUNNING);
mutex_lock(&ctx->uring_lock);
ret = 0;
} while (!list_empty(&ctx->rsrc_ref_list));
@@ -420,8 +394,9 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned int nr_args)
{
+ struct iovec __user *uvec = u64_to_user_ptr(up->data);
u64 __user *tags = u64_to_user_ptr(up->tags);
- struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
+ struct iovec fast_iov, *iov;
struct page *last_hpage = NULL;
__u32 done;
int i, err;
@@ -435,21 +410,23 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
struct io_mapped_ubuf *imu;
u64 tag = 0;
- err = io_copy_iov(ctx, &iov, iovs, done);
- if (err)
+ iov = iovec_from_user(&uvec[done], 1, 1, &fast_iov, ctx->compat);
+ if (IS_ERR(iov)) {
+ err = PTR_ERR(iov);
break;
+ }
if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
err = -EFAULT;
break;
}
- err = io_buffer_validate(&iov);
+ err = io_buffer_validate(iov);
if (err)
break;
- if (!iov.iov_base && tag) {
+ if (!iov->iov_base && tag) {
err = -EINVAL;
break;
}
- err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
+ err = io_sqe_buffer_register(ctx, iov, &imu, &last_hpage);
if (err)
break;
@@ -971,8 +948,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
{
struct page *last_hpage = NULL;
struct io_rsrc_data *data;
+ struct iovec fast_iov, *iov = &fast_iov;
+ const struct iovec __user *uvec = (struct iovec * __user) arg;
int i, ret;
- struct iovec iov;
BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
@@ -989,24 +967,27 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
+ if (!arg)
+ memset(iov, 0, sizeof(*iov));
+
for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
if (arg) {
- ret = io_copy_iov(ctx, &iov, arg, i);
- if (ret)
+ iov = iovec_from_user(&uvec[i], 1, 1, &fast_iov, ctx->compat);
+ if (IS_ERR(iov)) {
+ ret = PTR_ERR(iov);
break;
- ret = io_buffer_validate(&iov);
+ }
+ ret = io_buffer_validate(iov);
if (ret)
break;
- } else {
- memset(&iov, 0, sizeof(iov));
}
- if (!iov.iov_base && *io_get_tag_slot(data, i)) {
+ if (!iov->iov_base && *io_get_tag_slot(data, i)) {
ret = -EINVAL;
break;
}
- ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
+ ret = io_sqe_buffer_register(ctx, iov, &ctx->user_bufs[i],
&last_hpage);
if (ret)
break;
@@ -1068,7 +1049,6 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
* branch doesn't expect non PAGE_SIZE'd chunks.
*/
iter->bvec = bvec;
- iter->nr_segs = bvec->bv_len;
iter->count -= offset;
iter->iov_offset = offset;
} else {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1a2128459cb4..c004d21e2f12 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -772,7 +772,7 @@ static bool need_complete_io(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
-static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
+static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct kiocb *kiocb = &rw->kiocb;
@@ -787,7 +787,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
req->flags |= io_file_get_flags(file);
kiocb->ki_flags = file->f_iocb_flags;
- ret = kiocb_set_rw_flags(kiocb, rw->flags);
+ ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
if (unlikely(ret))
return ret;
kiocb->ki_flags |= IOCB_ALLOC_CACHE;
@@ -832,8 +832,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret < 0))
return ret;
}
-
- ret = io_rw_init_file(req, FMODE_READ);
+ ret = io_rw_init_file(req, FMODE_READ, READ);
if (unlikely(ret))
return ret;
req->cqe.res = iov_iter_count(&io->iter);
@@ -1013,7 +1012,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
ssize_t ret, ret2;
loff_t *ppos;
- ret = io_rw_init_file(req, FMODE_WRITE);
+ ret = io_rw_init_file(req, FMODE_WRITE, WRITE);
if (unlikely(ret))
return ret;
req->cqe.res = iov_iter_count(&io->iter);
diff --git a/io_uring/statx.c b/io_uring/statx.c
index abb874209caa..f7f9b202eec0 100644
--- a/io_uring/statx.c
+++ b/io_uring/statx.c
@@ -37,8 +37,7 @@ int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sx->flags = READ_ONCE(sqe->statx_flags);
sx->filename = getname_flags(path,
- getname_statx_lookup_flags(sx->flags),
- NULL);
+ getname_statx_lookup_flags(sx->flags));
if (IS_ERR(sx->filename)) {
int ret = PTR_ERR(sx->filename);
diff --git a/io_uring/xattr.c b/io_uring/xattr.c
index 44905b82eea8..6cf41c3bc369 100644
--- a/io_uring/xattr.c
+++ b/io_uring/xattr.c
@@ -96,7 +96,7 @@ int io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
- ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW);
if (IS_ERR(ix->filename)) {
ret = PTR_ERR(ix->filename);
ix->filename = NULL;
@@ -189,7 +189,7 @@ int io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
- ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW);
if (IS_ERR(ix->filename)) {
ret = PTR_ERR(ix->filename);
ix->filename = NULL;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 5eea4dc0509e..a7cbd69efbef 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -903,7 +903,8 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
audit_mq_open(oflag, mode, attr);
- if (IS_ERR(name = getname(u_name)))
+ name = getname(u_name);
+ if (IS_ERR(name))
return PTR_ERR(name);
fd = get_unused_fd_flags(O_CLOEXEC);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index be8c680121e4..d6ef4f4f9cba 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
entry->rule.buflen += f_val;
f->lsm_str = str;
err = security_audit_rule_init(f->type, f->op, str,
- (void **)&f->lsm_rule);
+ (void **)&f->lsm_rule,
+ GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (err == -EINVAL) {
@@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
/* our own (refreshed) copy of lsm_rule */
ret = security_audit_rule_init(df->type, df->op, df->lsm_str,
- (void **)&df->lsm_rule);
+ (void **)&df->lsm_rule, GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (ret == -EINVAL) {
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 583ee4fe48ef..e52b3ad231b9 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
struct vma_list {
struct vm_area_struct *vma;
struct list_head head;
+ atomic_t mmap_count;
};
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
if (!vml)
return -ENOMEM;
+ atomic_set(&vml->mmap_count, 1);
vma->vm_private_data = vml;
vml->vma = vma;
list_add(&vml->head, &arena->vma_list);
return 0;
}
+static void arena_vm_open(struct vm_area_struct *vma)
+{
+ struct vma_list *vml = vma->vm_private_data;
+
+ atomic_inc(&vml->mmap_count);
+}
+
static void arena_vm_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
- struct vma_list *vml;
+ struct vma_list *vml = vma->vm_private_data;
+ if (!atomic_dec_and_test(&vml->mmap_count))
+ return;
guard(mutex)(&arena->lock);
- vml = vma->vm_private_data;
+ /* update link list under lock */
list_del(&vml->head);
vma->vm_private_data = NULL;
kfree(vml);
@@ -287,6 +298,7 @@ out:
}
static const struct vm_operations_struct arena_vm_ops = {
+ .open = arena_vm_open,
.close = arena_vm_close,
.fault = arena_vm_fault,
};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 976cb258a0ed..c938dea5ddbf 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
- nbuckets, GFP_USER | __GFP_NOWARN);
+ smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
+ sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
err = -ENOMEM;
goto free_smap;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a6c3faa6e4a..695a0fb2cd4d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -736,11 +736,11 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
struct bpf_ksym *ksym;
- char *ret = NULL;
+ int ret = 0;
rcu_read_lock();
ksym = bpf_ksym_find(addr);
@@ -748,9 +748,8 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long symbol_start = ksym->start;
unsigned long symbol_end = ksym->end;
- strscpy(sym, ksym->name, KSYM_NAME_LEN);
+ ret = strscpy(sym, ksym->name, KSYM_NAME_LEN);
- ret = sym;
if (size)
*size = symbol_end - symbol_start;
if (off)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2a69a9a36c0f..3243c83ef3e3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1084,7 +1084,10 @@ struct bpf_async_cb {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct work_struct delete_work;
+ };
u64 flags;
};
@@ -1107,6 +1110,7 @@ struct bpf_async_cb {
struct bpf_hrtimer {
struct bpf_async_cb cb;
struct hrtimer timer;
+ atomic_t cancelling;
};
struct bpf_work {
@@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
kfree_rcu(w, cb.rcu);
}
+static void bpf_timer_delete_work(struct work_struct *work)
+{
+ struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
+
+ /* Cancel the timer and wait for callback to complete if it was running.
+ * If hrtimer_cancel() can be safely called it's safe to call
+ * kfree_rcu(t) right after for both preallocated and non-preallocated
+ * maps. The async->cb = NULL was already done and no code path can see
+ * address 't' anymore. Timer if armed for existing bpf_hrtimer before
+ * bpf_timer_cancel_and_free will have been cancelled.
+ */
+ hrtimer_cancel(&t->timer);
+ kfree_rcu(t, cb.rcu);
+}
+
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
enum bpf_async_type type)
{
@@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
clockid = flags & (MAX_CLOCKS - 1);
t = (struct bpf_hrtimer *)cb;
+ atomic_set(&t->cancelling, 0);
+ INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
t->timer.function = bpf_timer_cb;
cb->value = (void *)async - map->record->timer_off;
@@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
{
- struct bpf_hrtimer *t;
+ struct bpf_hrtimer *t, *cur_t;
+ bool inc = false;
int ret = 0;
if (in_nmi())
@@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
ret = -EINVAL;
goto out;
}
- if (this_cpu_read(hrtimer_running) == t) {
+
+ cur_t = this_cpu_read(hrtimer_running);
+ if (cur_t == t) {
/* If bpf callback_fn is trying to bpf_timer_cancel()
* its own timer the hrtimer_cancel() will deadlock
- * since it waits for callback_fn to finish
+ * since it waits for callback_fn to finish.
*/
ret = -EDEADLK;
goto out;
}
+
+ /* Only account in-flight cancellations when invoked from a timer
+ * callback, since we want to avoid waiting only if other _callbacks_
+ * are waiting on us, to avoid introducing lockups. Non-callback paths
+ * are ok, since nobody would synchronously wait for their completion.
+ */
+ if (!cur_t)
+ goto drop;
+ atomic_inc(&t->cancelling);
+ /* Need full barrier after relaxed atomic_inc */
+ smp_mb__after_atomic();
+ inc = true;
+ if (atomic_read(&cur_t->cancelling)) {
+ /* We're cancelling timer t, while some other timer callback is
+ * attempting to cancel us. In such a case, it might be possible
+ * that timer t belongs to the other callback, or some other
+ * callback waiting upon it (creating transitive dependencies
+ * upon us), and we will enter a deadlock if we continue
+ * cancelling and waiting for it synchronously, since it might
+ * do the same. Bail!
+ */
+ ret = -EDEADLK;
+ goto out;
+ }
+drop:
drop_prog_refcnt(&t->cb);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1467,6 +1516,8 @@ out:
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
+ if (inc)
+ atomic_dec(&t->cancelling);
rcu_read_unlock();
return ret;
}
@@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
if (!t)
return;
- /* Cancel the timer and wait for callback to complete if it was running.
- * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
- * right after for both preallocated and non-preallocated maps.
- * The async->cb = NULL was already done and no code path can
- * see address 't' anymore.
- *
- * Check that bpf_map_delete/update_elem() wasn't called from timer
- * callback_fn. In such case don't call hrtimer_cancel() (since it will
- * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
- * return -1). Though callback_fn is still running on this cpu it's
+ /* We check that bpf_map_delete/update_elem() was called from timer
+ * callback_fn. In such case we don't call hrtimer_cancel() (since it
+ * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
+ * just return -1). Though callback_fn is still running on this cpu it's
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
* from 't'. The bpf subprog callback_fn won't be able to access 't',
* since async->cb = NULL was already done. The timer will be
* effectively cancelled because bpf_timer_cb() will return
* HRTIMER_NORESTART.
+ *
+ * However, it is possible the timer callback_fn calling us armed the
+ * timer _before_ calling us, such that failing to cancel it here will
+ * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
+ * Therefore, we _need_ to cancel any outstanding timers before we do
+ * kfree_rcu, even though no more timers can be armed.
+ *
+ * Moreover, we need to schedule work even if timer does not belong to
+ * the calling callback_fn, as on two different CPUs, we can end up in a
+ * situation where both sides run in parallel, try to cancel one
+ * another, and we end up waiting on both sides in hrtimer_cancel
+ * without making forward progress, since timer1 depends on time2
+ * callback to finish, and vice versa.
+ *
+ * CPU 1 (timer1_cb) CPU 2 (timer2_cb)
+ * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
+ *
+ * To avoid these issues, punt to workqueue context when we are in a
+ * timer callback.
*/
- if (this_cpu_read(hrtimer_running) != t)
- hrtimer_cancel(&t->timer);
- kfree_rcu(t, cb.rcu);
+ if (this_cpu_read(hrtimer_running))
+ queue_work(system_unbound_wq, &t->cb.delete_work);
+ else
+ bpf_timer_delete_work(&t->cb.delete_work);
}
/* This function is called by map_delete/update_elem for individual element and
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36ef8e96787e..214a9fa8c6fb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4549,11 +4549,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr.id = 0;
} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
env->bpf_capable) {
- struct bpf_reg_state fake_reg = {};
+ struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
- __mark_reg_known(&fake_reg, insn->imm);
- fake_reg.type = SCALAR_VALUE;
- save_register_state(env, state, spi, &fake_reg, size);
+ memset(tmp_reg, 0, sizeof(*tmp_reg));
+ __mark_reg_known(tmp_reg, insn->imm);
+ tmp_reg->type = SCALAR_VALUE;
+ save_register_state(env, state, spi, tmp_reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -6235,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
}
reg->u32_min_value = 0;
reg->u32_max_value = U32_MAX;
+ reg->var_off = tnum_subreg(tnum_unknown);
}
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6279,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
reg->s32_max_value = s32_max;
reg->u32_min_value = (u32)s32_min;
reg->u32_max_value = (u32)s32_max;
+ reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
return;
}
@@ -12718,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
return res < a;
}
+static bool signed_add16_overflows(s16 a, s16 b)
+{
+ /* Do the add in u16, where overflow is well-defined */
+ s16 res = (s16)((u16)a + (u16)b);
+
+ if (b < 0)
+ return res > a;
+ return res < a;
+}
+
static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
@@ -15113,7 +15126,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
struct bpf_reg_state *eq_branch_regs;
- struct bpf_reg_state fake_reg = {};
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -15179,7 +15191,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
return -EINVAL;
}
- src_reg = &fake_reg;
+ src_reg = &env->fake_reg[0];
+ memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
__mark_reg_known(src_reg, insn->imm);
}
@@ -15239,10 +15252,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
&other_branch_regs[insn->src_reg],
dst_reg, src_reg, opcode, is_jmp32);
} else /* BPF_SRC(insn->code) == BPF_K */ {
+ /* reg_set_min_max() can mangle the fake_reg. Make a copy
+ * so that these are two different memory locations. The
+ * src_reg is not used beyond here in context of K.
+ */
+ memcpy(&env->fake_reg[1], &env->fake_reg[0],
+ sizeof(env->fake_reg[0]));
err = reg_set_min_max(env,
&other_branch_regs[insn->dst_reg],
- src_reg /* fake one */,
- dst_reg, src_reg /* same fake one */,
+ &env->fake_reg[0],
+ dst_reg, &env->fake_reg[1],
opcode, is_jmp32);
}
if (err)
@@ -17441,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
- goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18723,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (signed_add32_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->imm += delta;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (signed_add16_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->off += delta;
+ }
+ }
+ return 0;
+}
+
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
u32 off, u32 cnt)
{
@@ -19997,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
@@ -20313,7 +20368,7 @@ patch_map_ops_generic:
goto next_insn;
}
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
/* Implement bpf_get_smp_processor_id() inline. */
if (insn->imm == BPF_FUNC_get_smp_processor_id &&
prog->jit_requested && bpf_jit_supports_percpu_insn()) {
@@ -20539,6 +20594,13 @@ next_insn:
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
}
/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1979a9935719..7121f058674a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1859,6 +1859,9 @@ static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return fals
void __init bringup_nonboot_cpus(unsigned int max_cpus)
{
+ if (!max_cpus)
+ return;
+
/* Try parallel bringup optimization if enabled */
if (cpuhp_bringup_cpus_parallel(max_cpus))
return;
@@ -2446,7 +2449,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* The caller needs to hold cpus read locked while calling this function.
* Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -2469,7 +2472,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance);
- dynstate = state == CPUHP_AP_ONLINE_DYN;
+ dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN;
if (ret > 0 && dynstate) {
state = ret;
ret = 0;
@@ -2500,8 +2503,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
out:
mutex_unlock(&cpuhp_state_mutex);
/*
- * If the requested state is CPUHP_AP_ONLINE_DYN, return the
- * dynamically allocated state in case of success.
+ * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN,
+ * return the dynamically allocated state in case of success.
*/
if (!ret && dynstate)
return state;
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ retry:
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 99076dbe27d8..763a042eef9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -616,12 +616,6 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
exe_file = get_mm_exe_file(oldmm);
RCU_INIT_POINTER(mm->exe_file, exe_file);
- /*
- * We depend on the oldmm having properly denied write access to the
- * exe_file already.
- */
- if (exe_file && deny_write_access(exe_file))
- pr_warn_once("deny_write_access() failed in %s\n", __func__);
}
#ifdef CONFIG_MMU
@@ -1412,20 +1406,11 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
*/
old_exe_file = rcu_dereference_raw(mm->exe_file);
- if (new_exe_file) {
- /*
- * We expect the caller (i.e., sys_execve) to already denied
- * write access, so this is unlikely to fail.
- */
- if (unlikely(deny_write_access(new_exe_file)))
- return -EACCES;
+ if (new_exe_file)
get_file(new_exe_file);
- }
rcu_assign_pointer(mm->exe_file, new_exe_file);
- if (old_exe_file) {
- allow_write_access(old_exe_file);
+ if (old_exe_file)
fput(old_exe_file);
- }
return 0;
}
@@ -1464,9 +1449,6 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
return ret;
}
- ret = deny_write_access(new_exe_file);
- if (ret)
- return -EACCES;
get_file(new_exe_file);
/* set the new file */
@@ -1475,10 +1457,8 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
rcu_assign_pointer(mm->exe_file, new_exe_file);
mmap_write_unlock(mm);
- if (old_exe_file) {
- allow_write_access(old_exe_file);
+ if (old_exe_file)
fput(old_exe_file);
- }
return 0;
}
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 74a4ef1da9ad..fd75b4a484d7 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 10)
+#if (__GNUC__ >= 14)
+#define GCOV_COUNTERS 9
+#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
#elif (__GNUC__ >= 7)
#define GCOV_COUNTERS 9
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 22ea19a36e6e..98b9622d372e 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -388,12 +388,12 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
!!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
-static const char *kallsyms_lookup_buildid(unsigned long addr,
+static int kallsyms_lookup_buildid(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset, char **modname,
const unsigned char **modbuildid, char *namebuf)
{
- const char *ret;
+ int ret;
namebuf[KSYM_NAME_LEN - 1] = 0;
namebuf[0] = 0;
@@ -410,7 +410,7 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
if (modbuildid)
*modbuildid = NULL;
- ret = namebuf;
+ ret = strlen(namebuf);
goto found;
}
@@ -442,8 +442,13 @@ const char *kallsyms_lookup(unsigned long addr,
unsigned long *offset,
char **modname, char *namebuf)
{
- return kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
- NULL, namebuf);
+ int ret = kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
+ NULL, namebuf);
+
+ if (!ret)
+ return NULL;
+
+ return namebuf;
}
int lookup_symbol_name(unsigned long addr, char *symname)
@@ -478,19 +483,15 @@ static int __sprint_symbol(char *buffer, unsigned long address,
{
char *modname;
const unsigned char *buildid;
- const char *name;
unsigned long offset, size;
int len;
address += symbol_offset;
- name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
+ len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
buffer);
- if (!name)
+ if (!len)
return sprintf(buffer, "0x%lx", address - symbol_offset);
- if (name != buffer)
- strcpy(buffer, name);
- len = strlen(buffer);
offset -= symbol_offset;
if (add_offset)
diff --git a/kernel/kcov.c b/kernel/kcov.c
index c3124f6d5536..f0a69d402066 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -632,6 +632,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
return -EINVAL;
kcov->mode = mode;
t->kcov = kcov;
+ t->kcov_mode = KCOV_MODE_REMOTE;
kcov->t = t;
kcov->remote = true;
kcov->remote_size = remote_arg->area_size;
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 62fb57bb9f16..bf65e0c3c86f 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -321,14 +321,15 @@ void * __weak dereference_module_function_descriptor(struct module *mod,
* For kallsyms to ask for address resolution. NULL means not found. Careful
* not to lock to avoid deadlock on oopses, simply disable preemption.
*/
-const char *module_address_lookup(unsigned long addr,
- unsigned long *size,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
+int module_address_lookup(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
{
- const char *ret = NULL;
+ const char *sym;
+ int ret = 0;
struct module *mod;
preempt_disable();
@@ -344,12 +345,10 @@ const char *module_address_lookup(unsigned long addr,
#endif
}
- ret = find_kallsyms_symbol(mod, addr, size, offset);
- }
- /* Make a copy in here where it's safe */
- if (ret) {
- strscpy(namebuf, ret, KSYM_NAME_LEN);
- ret = namebuf;
+ sym = find_kallsyms_symbol(mod, addr, size, offset);
+
+ if (sym)
+ ret = strscpy(namebuf, sym, KSYM_NAME_LEN);
}
preempt_enable();
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index dc48fecfa1dc..25f3cf679b35 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -218,6 +218,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
*/
do {
clear_thread_flag(TIF_SIGPENDING);
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
rc = kernel_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 040fe7d1eda2..39a2b61c7232 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y = printk.o conopt.o
+obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c
deleted file mode 100644
index 9d507bac3657..000000000000
--- a/kernel/printk/conopt.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Kernel command line console options for hardware based addressing
- *
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
- * Author: Tony Lindgren <[email protected]>
- */
-
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/errno.h>
-
-#include "console_cmdline.h"
-
-/*
- * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0
- * in addition to the legacy ttyS0 style naming.
- */
-#define CONSOLE_NAME_MAX 32
-
-#define CONSOLE_OPT_MAX 16
-#define CONSOLE_BRL_OPT_MAX 16
-
-struct console_option {
- char name[CONSOLE_NAME_MAX];
- char opt[CONSOLE_OPT_MAX];
- char brl_opt[CONSOLE_BRL_OPT_MAX];
- u8 has_brl_opt:1;
-};
-
-/* Updated only at console_setup() time, no locking needed */
-static struct console_option conopt[MAX_CMDLINECONSOLES];
-
-/**
- * console_opt_save - Saves kernel command line console option for driver use
- * @str: Kernel command line console name and option
- * @brl_opt: Braille console options
- *
- * Saves a kernel command line console option for driver subsystems to use for
- * adding a preferred console during init. Called from console_setup() only.
- *
- * Return: 0 on success, negative error code on failure.
- */
-int __init console_opt_save(const char *str, const char *brl_opt)
-{
- struct console_option *con;
- size_t namelen, optlen;
- const char *opt;
- int i;
-
- namelen = strcspn(str, ",");
- if (namelen == 0 || namelen >= CONSOLE_NAME_MAX)
- return -EINVAL;
-
- opt = str + namelen;
- if (*opt == ',')
- opt++;
-
- optlen = strlen(opt);
- if (optlen >= CONSOLE_OPT_MAX)
- return -EINVAL;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
-
- if (con->name[0]) {
- if (!strncmp(str, con->name, namelen))
- return 0;
- continue;
- }
-
- /*
- * The name isn't terminated, only opt is. Empty opt is fine,
- * but brl_opt can be either empty or NULL. For more info, see
- * _braille_console_setup().
- */
- strscpy(con->name, str, namelen + 1);
- strscpy(con->opt, opt, CONSOLE_OPT_MAX);
- if (brl_opt) {
- strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX);
- con->has_brl_opt = 1;
- }
-
- return 0;
- }
-
- return -ENOMEM;
-}
-
-static struct console_option *console_opt_find(const char *name)
-{
- struct console_option *con;
- int i;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
- if (!strcmp(name, con->name))
- return con;
- }
-
- return NULL;
-}
-
-/**
- * add_preferred_console_match - Adds a preferred console if a match is found
- * @match: Expected console on kernel command line, such as console=DEVNAME:0.0
- * @name: Name of the console character device to add such as ttyS
- * @idx: Index for the console
- *
- * Allows driver subsystems to add a console after translating the command
- * line name to the character device name used for the console. Options are
- * added automatically based on the kernel command line. Duplicate preferred
- * consoles are ignored by __add_preferred_console().
- *
- * Return: 0 on success, negative error code on failure.
- */
-int add_preferred_console_match(const char *match, const char *name,
- const short idx)
-{
- struct console_option *con;
- char *brl_opt = NULL;
-
- if (!match || !strlen(match) || !name || !strlen(name) ||
- idx < 0)
- return -EINVAL;
-
- con = console_opt_find(match);
- if (!con)
- return -ENOENT;
-
- /*
- * See __add_preferred_console(). It checks for NULL brl_options to set
- * the preferred_console flag. Empty brl_opt instead of NULL leads into
- * the preferred_console flag not set, and CON_CONSDEV not being set,
- * and the boot console won't get disabled at the end of console_setup().
- */
- if (con->has_brl_opt)
- brl_opt = con->brl_opt;
-
- console_opt_add_preferred_console(name, idx, con->opt, brl_opt);
-
- return 0;
-}
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index a125e0235589..3ca74ad391d6 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -2,12 +2,6 @@
#ifndef _CONSOLE_CMDLINE_H
#define _CONSOLE_CMDLINE_H
-#define MAX_CMDLINECONSOLES 8
-
-int console_opt_save(const char *str, const char *brl_opt);
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options);
-
struct console_cmdline
{
char name[16]; /* Name of the driver */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 420fd310129d..dddb15f48d59 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -383,6 +383,9 @@ static int console_locked;
/*
* Array of consoles built from command line options (console=)
*/
+
+#define MAX_CMDLINECONSOLES 8
+
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
@@ -2500,17 +2503,6 @@ static int __init console_setup(char *str)
if (_braille_console_setup(&str, &brl_options))
return 1;
- /* Save the console for driver subsystem use */
- if (console_opt_save(str, brl_options))
- return 1;
-
- /* Flag register_console() to not call try_enable_default_console() */
- console_set_on_cmdline = 1;
-
- /* Don't attempt to parse a DEVNAME:0.0 style console */
- if (strchr(str, ':'))
- return 1;
-
/*
* Decode str into name, index, options.
*/
@@ -2541,13 +2533,6 @@ static int __init console_setup(char *str)
}
__setup("console=", console_setup);
-/* Only called from add_preferred_console_match() */
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options)
-{
- return __add_preferred_console(name, idx, options, brl_options, true);
-}
-
/**
* add_preferred_console - add a device to the list of preferred consoles.
* @name: device name
@@ -3522,7 +3507,7 @@ void register_console(struct console *newcon)
* Note that a console with tty binding will have CON_CONSDEV
* flag set and will be first in the list.
*/
- if (preferred_console < 0 && !console_set_on_cmdline) {
+ if (preferred_console < 0) {
if (hlist_empty(&console_list) || !console_first()->device ||
console_first()->flags & CON_BOOT) {
try_enable_default_console(newcon);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcf2c4cc0522..59ce0841eb1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
- psi_account_irqtime(rq->curr, irq_delta);
delayacct_irq(rq->curr, irq_delta);
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -5665,7 +5664,7 @@ void sched_tick(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
- struct task_struct *curr = rq->curr;
+ struct task_struct *curr;
struct rq_flags rf;
unsigned long hw_pressure;
u64 resched_latency;
@@ -5677,6 +5676,9 @@ void sched_tick(void)
rq_lock(rq, &rf);
+ curr = rq->curr;
+ psi_account_irqtime(rq, curr, NULL);
+
update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
++*switch_count;
migrate_disable_switch(rq, prev);
+ psi_account_irqtime(rq, prev, next);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c75d1307d86d..9bedd148f007 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* The replenish timer needs to be canceled. No
* problem if it fires concurrently: boosted threads
* are ignored in dl_task_timer().
+ *
+ * If the timer callback was running (hrtimer_try_to_cancel == -1),
+ * it will eventually call put_task_struct().
*/
- hrtimer_try_to_cancel(&p->dl.dl_timer);
+ if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
+ !dl_server(&p->dl))
+ put_task_struct(p);
p->dl.dl_throttled = 0;
}
} else if (!dl_prio(p->normal_prio)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8a5b1ae0aa55..24dda708b699 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env)
break;
env->loop++;
- /*
- * We've more or less seen every task there is, call it quits
- * unless we haven't found any movable task yet.
- */
- if (env->loop > env->loop_max &&
- !(env->flags & LBF_ALL_PINNED))
+ /* We've more or less seen every task there is, call it quits */
+ if (env->loop > env->loop_max)
break;
/* take a breather every nr_migrate tasks */
@@ -11393,9 +11389,7 @@ more_balance:
if (env.flags & LBF_NEED_BREAK) {
env.flags &= ~LBF_NEED_BREAK;
- /* Stop if we tried all running tasks */
- if (env.loop < busiest->nr_running)
- goto more_balance;
+ goto more_balance;
}
/*
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 7b4aa5809c0f..507d7b8d79af 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
enum psi_states s;
u32 state_mask;
+ lockdep_assert_rq_held(cpu_rq(cpu));
groupc = per_cpu_ptr(group->pcpu, cpu);
/*
@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
}
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-void psi_account_irqtime(struct task_struct *task, u32 delta)
+void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
{
- int cpu = task_cpu(task);
+ int cpu = task_cpu(curr);
struct psi_group *group;
struct psi_group_cpu *groupc;
- u64 now;
+ u64 now, irq;
+ s64 delta;
if (static_branch_likely(&psi_disabled))
return;
- if (!task->pid)
+ if (!curr->pid)
+ return;
+
+ lockdep_assert_rq_held(rq);
+ group = task_psi_group(curr);
+ if (prev && task_psi_group(prev) == group)
return;
now = cpu_clock(cpu);
+ irq = irq_time_read(cpu);
+ delta = (s64)(irq - rq->psi_irq_time);
+ if (delta < 0)
+ return;
+ rq->psi_irq_time = irq;
- group = task_psi_group(task);
do {
if (!group->enabled)
continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..ef20c61004eb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1126,6 +1126,7 @@ struct rq {
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
+ u64 psi_irq_time;
#endif
#ifdef CONFIG_PARAVIRT
u64 prev_steal_time;
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 38f3698f5e5b..b02dfc322951 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se)
void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
bool sleep);
-void psi_account_irqtime(struct task_struct *task, u32 delta);
-
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
+#else
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
+#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
/*
* PSI tracks state that persists across sleeps, such as iowaits and
* memory stalls. As a result, it has to distinguish between sleeps,
@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
static inline void psi_sched_switch(struct task_struct *prev,
struct task_struct *next,
bool sleep) {}
-static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
#endif /* CONFIG_PSI */
#ifdef CONFIG_SCHED_INFO
diff --git a/kernel/signal.c b/kernel/signal.c
index 1f9dd41c04be..60c737e423a1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2600,6 +2600,14 @@ static void do_freezer_trap(void)
spin_unlock_irq(&current->sighand->siglock);
cgroup_enter_frozen();
schedule();
+
+ /*
+ * We could've been woken by task_work, run it to clear
+ * TIF_NOTIFY_SIGNAL. The caller will retry if necessary.
+ */
+ clear_notify_signal();
+ if (unlikely(task_work_pending(current)))
+ task_work_run();
}
static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d7eee421d4bc..b696b85ac63e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,8 +46,8 @@ COND_SYSCALL(io_getevents_time32);
COND_SYSCALL(io_getevents);
COND_SYSCALL(io_pgetevents_time32);
COND_SYSCALL(io_pgetevents);
-COND_SYSCALL_COMPAT(io_pgetevents_time32);
COND_SYSCALL_COMPAT(io_pgetevents);
+COND_SYSCALL_COMPAT(io_pgetevents_time64);
COND_SYSCALL(io_uring_setup);
COND_SYSCALL(io_uring_enter);
COND_SYSCALL(io_uring_register);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642..b8ee320208d4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1285,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
+ if (WARN_ON_ONCE(!timer->function))
+ return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 166ad5444eea..721c3b221048 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST
config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
- depends on SYNTH_EVENTS
+ depends on SYNTH_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel synthetic event definition and
@@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST
config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
- depends on KPROBE_EVENTS
+ depends on KPROBE_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel kprobe event definition.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 65208d3b5ed9..eacab4020508 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6969,7 +6969,7 @@ allocate_ftrace_mod_map(struct module *mod,
return mod_map;
}
-static const char *
+static int
ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
@@ -6990,21 +6990,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
*size = found_func->size;
if (off)
*off = addr - found_func->ip;
- if (sym)
- strscpy(sym, found_func->name, KSYM_NAME_LEN);
-
- return found_func->name;
+ return strscpy(sym, found_func->name, KSYM_NAME_LEN);
}
- return NULL;
+ return 0;
}
-const char *
+int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
struct ftrace_mod_map *mod_map;
- const char *ret = NULL;
+ int ret = 0;
/* mod_map is freed via call_rcu() */
preempt_disable();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 003474c9a77d..3fbaecfc88c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,6 +125,7 @@ enum wq_internal_consts {
HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 32,
+ WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
};
/*
@@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(struct worker *worker)
complete(detach_completion);
}
+static int format_worker_id(char *buf, size_t size, struct worker *worker,
+ struct worker_pool *pool)
+{
+ if (worker->rescue_wq)
+ return scnprintf(buf, size, "kworker/R-%s",
+ worker->rescue_wq->name);
+
+ if (pool) {
+ if (pool->cpu >= 0)
+ return scnprintf(buf, size, "kworker/%d:%d%s",
+ pool->cpu, worker->id,
+ pool->attrs->nice < 0 ? "H" : "");
+ else
+ return scnprintf(buf, size, "kworker/u%d:%d",
+ pool->id, worker->id);
+ } else {
+ return scnprintf(buf, size, "kworker/dying");
+ }
+}
+
/**
* create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to
@@ -2758,7 +2779,6 @@ static struct worker *create_worker(struct worker_pool *pool)
{
struct worker *worker;
int id;
- char id_buf[23];
/* ID is needed to determine kthread name */
id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
@@ -2777,17 +2797,14 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->id = id;
if (!(pool->flags & POOL_BH)) {
- if (pool->cpu >= 0)
- snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
- pool->attrs->nice < 0 ? "H" : "");
- else
- snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+ char id_buf[WORKER_ID_LEN];
+ format_worker_id(id_buf, sizeof(id_buf), worker, pool);
worker->task = kthread_create_on_node(worker_thread, worker,
- pool->node, "kworker/%s", id_buf);
+ pool->node, "%s", id_buf);
if (IS_ERR(worker->task)) {
if (PTR_ERR(worker->task) == -EINTR) {
- pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
+ pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n",
id_buf);
} else {
pr_err_once("workqueue: Failed to create a worker thread: %pe",
@@ -3350,7 +3367,6 @@ woke_up:
raw_spin_unlock_irq(&pool->lock);
set_pf_worker(false);
- set_task_comm(worker->task, "kworker/dying");
ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
WARN_ON_ONCE(!list_empty(&worker->entry));
@@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
static int init_rescuer(struct workqueue_struct *wq)
{
struct worker *rescuer;
+ char id_buf[WORKER_ID_LEN];
int ret;
if (!(wq->flags & WQ_MEM_RECLAIM))
@@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue_struct *wq)
}
rescuer->rescue_wq = wq;
- rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name);
+ format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL);
+
+ rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf);
if (IS_ERR(rescuer->task)) {
ret = PTR_ERR(rescuer->task);
pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
@@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void)
/* used to show worker information through /proc/PID/{comm,stat,status} */
void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
{
- int off;
-
- /* always show the actual comm */
- off = strscpy(buf, task->comm, size);
- if (off < 0)
- return;
-
/* stabilize PF_WQ_WORKER and worker pool association */
mutex_lock(&wq_pool_attach_mutex);
if (task->flags & PF_WQ_WORKER) {
struct worker *worker = kthread_data(task);
struct worker_pool *pool = worker->pool;
+ int off;
+
+ off = format_worker_id(buf, size, worker, pool);
if (pool) {
raw_spin_lock_irq(&pool->lock);
@@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
}
raw_spin_unlock_irq(&pool->lock);
}
+ } else {
+ strscpy(buf, task->comm, size);
}
mutex_unlock(&wq_pool_attach_mutex);
diff --git a/lib/Kconfig b/lib/Kconfig
index d33a268bc256..b0a76dff5c18 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -539,13 +539,7 @@ config CPUMASK_OFFSTACK
stack overflow.
config FORCE_NR_CPUS
- bool "Set number of CPUs at compile time"
- depends on SMP && EXPERT && !COMPILE_TEST
- help
- Say Yes if you have NR_CPUS set to an actual number of possible
- CPUs in your system, not to a default value. This forces the core
- code to rely on compile-time value and optimize kernel routines
- better.
+ def_bool !SMP
config CPU_RMAP
bool
diff --git a/lib/Makefile b/lib/Makefile
index 3b1769045651..30337431d10e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -426,3 +426,7 @@ $(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE
ifeq ($(CONFIG_FORTIFY_SOURCE),y)
$(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG)
endif
+
+# Some architectures define __NO_FORTIFY if __SANITIZE_ADDRESS__ is undefined.
+# Pass CFLAGS_KASAN to avoid warnings.
+$(foreach x, $(patsubst %.log,%.o,$(TEST_FORTIFY_LOGS)), $(eval KASAN_SANITIZE_$(x) := y))
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 11ed973ac359..c347b8b72d78 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -227,6 +227,7 @@ struct page_ext_operations page_alloc_tagging_ops = {
};
EXPORT_SYMBOL(page_alloc_tagging_ops);
+#ifdef CONFIG_SYSCTL
static struct ctl_table memory_allocation_profiling_sysctls[] = {
{
.procname = "mem_profiling",
@@ -241,6 +242,17 @@ static struct ctl_table memory_allocation_profiling_sysctls[] = {
{ }
};
+static void __init sysctl_init(void)
+{
+ if (!mem_profiling_support)
+ memory_allocation_profiling_sysctls[0].mode = 0444;
+
+ register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+}
+#else /* CONFIG_SYSCTL */
+static inline void sysctl_init(void) {}
+#endif /* CONFIG_SYSCTL */
+
static int __init alloc_tag_init(void)
{
const struct codetag_type_desc desc = {
@@ -253,9 +265,7 @@ static int __init alloc_tag_init(void)
if (IS_ERR(alloc_tag_cttype))
return PTR_ERR(alloc_tag_cttype);
- if (!mem_profiling_support)
- memory_allocation_profiling_sysctls[0].mode = 0444;
- register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+ sysctl_init();
procfs_init();
return 0;
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index 56d8bafeb848..8267e8d71338 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -38,7 +38,9 @@ close IN_FILE || die;
#
open C_FILE, ">$ARGV[1]" or die;
print C_FILE "/*\n";
-print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
+my $scriptname = $0;
+$scriptname =~ s#^\Q$abs_srctree/\E##;
+print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
print C_FILE " */\n";
#
diff --git a/lib/closure.c b/lib/closure.c
index 07409e9e35a5..116afae2eed9 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,14 +13,25 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static inline void closure_put_after_sub(struct closure *cl, int flags)
+static inline void closure_put_after_sub_checks(int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
- BUG_ON(flags & CLOSURE_GUARD_MASK);
- BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
+ if (WARN(flags & CLOSURE_GUARD_MASK,
+ "closure has guard bits set: %x (%u)",
+ flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
+ r &= ~CLOSURE_GUARD_MASK;
+
+ WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
+ "closure ref hit 0 with incorrect flags set: %x (%u)",
+ flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+}
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+ closure_put_after_sub_checks(flags);
- if (!r) {
+ if (!(flags & CLOSURE_REMAINING_MASK)) {
smp_acquire__after_ctrl_dep();
cl->closure_get_happened = false;
@@ -139,6 +150,41 @@ void __sched __closure_sync(struct closure *cl)
}
EXPORT_SYMBOL(__closure_sync);
+/*
+ * closure_return_sync - finish running a closure, synchronously (i.e. waiting
+ * for outstanding get()s to finish) and returning once closure refcount is 0.
+ *
+ * Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
+ * closure_get_not_zero() calls waill fail.
+ */
+void __sched closure_return_sync(struct closure *cl)
+{
+ struct closure_syncer s = { .task = current };
+
+ cl->s = &s;
+ set_closure_fn(cl, closure_sync_fn, NULL);
+
+ unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
+ &cl->remaining);
+
+ closure_put_after_sub_checks(flags);
+
+ if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (s.done)
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ }
+
+ if (cl->parent)
+ closure_put(cl->parent);
+}
+EXPORT_SYMBOL(closure_return_sync);
+
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
struct closure_syncer s = { .task = current };
@@ -198,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
{
unsigned long flags;
+ if (cl->magic == CLOSURE_MAGIC_STACK)
+ return;
+
BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
cl->magic = CLOSURE_MAGIC_DEAD;
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index fb12a9bacd2f..7cea91e193a8 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -78,16 +78,17 @@ static bool obj_freeing;
/* The number of objs on the global free list */
static int obj_nr_tofree;
-static int debug_objects_maxchain __read_mostly;
-static int __maybe_unused debug_objects_maxchecked __read_mostly;
-static int debug_objects_fixups __read_mostly;
-static int debug_objects_warnings __read_mostly;
-static int debug_objects_enabled __read_mostly
- = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int debug_objects_pool_size __read_mostly
- = ODEBUG_POOL_SIZE;
-static int debug_objects_pool_min_level __read_mostly
- = ODEBUG_POOL_MIN_LEVEL;
+static int __data_racy debug_objects_maxchain __read_mostly;
+static int __data_racy __maybe_unused debug_objects_maxchecked __read_mostly;
+static int __data_racy debug_objects_fixups __read_mostly;
+static int __data_racy debug_objects_warnings __read_mostly;
+static int __data_racy debug_objects_enabled __read_mostly
+ = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
+static int __data_racy debug_objects_pool_size __read_mostly
+ = ODEBUG_POOL_SIZE;
+static int __data_racy debug_objects_pool_min_level __read_mostly
+ = ODEBUG_POOL_MIN_LEVEL;
+
static const struct debug_obj_descr *descr_test __read_mostly;
static struct kmem_cache *obj_cache __ro_after_init;
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index f9cc467334ce..e17d520f532c 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -374,7 +374,7 @@ static const char * const test_strs[] = {
for (i = 0; i < ARRAY_SIZE(test_strs); i++) { \
len = strlen(test_strs[i]); \
KUNIT_EXPECT_EQ(test, __builtin_constant_p(len), 0); \
- checker(len, kmemdup_array(test_strs[i], len, 1, gfp), \
+ checker(len, kmemdup_array(test_strs[i], 1, len, gfp), \
kfree(p)); \
checker(len, kmemdup(test_strs[i], len, gfp), \
kfree(p)); \
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index 4ef31b0bb74d..d305b0c054bb 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -1178,14 +1178,28 @@ struct foo {
s16 array[] __counted_by(counter);
};
+struct bar {
+ int a;
+ u32 counter;
+ s16 array[];
+};
+
static void DEFINE_FLEX_test(struct kunit *test)
{
- DEFINE_RAW_FLEX(struct foo, two, array, 2);
+ /* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */
+ DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2);
+#if __has_attribute(__counted_by__)
+ int expected_raw_size = sizeof(struct foo);
+#else
+ int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16);
+#endif
+ /* Without annotation, it will always be on-stack size. */
+ DEFINE_RAW_FLEX(struct bar, two, array, 2);
DEFINE_FLEX(struct foo, eight, array, counter, 8);
DEFINE_FLEX(struct foo, empty, array, counter, 0);
- KUNIT_EXPECT_EQ(test, __struct_size(two),
- sizeof(struct foo) + sizeof(s16) + sizeof(s16));
+ KUNIT_EXPECT_EQ(test, __struct_size(two_but_zero), expected_raw_size);
+ KUNIT_EXPECT_EQ(test, __struct_size(two), sizeof(struct bar) + 2 * sizeof(s16));
KUNIT_EXPECT_EQ(test, __struct_size(eight), 24);
KUNIT_EXPECT_EQ(test, __struct_size(empty), sizeof(struct foo));
}
diff --git a/lib/string_helpers_kunit.c b/lib/string_helpers_kunit.c
index f88e39fd68d6..c853046183d2 100644
--- a/lib/string_helpers_kunit.c
+++ b/lib/string_helpers_kunit.c
@@ -625,4 +625,5 @@ static struct kunit_suite string_helpers_test_suite = {
kunit_test_suites(&string_helpers_test_suite);
+MODULE_DESCRIPTION("Test cases for string helpers module");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/string_kunit.c b/lib/string_kunit.c
index 2a812decf14b..c919e3293da6 100644
--- a/lib/string_kunit.c
+++ b/lib/string_kunit.c
@@ -633,4 +633,5 @@ static struct kunit_suite string_test_suite = {
kunit_test_suites(&string_test_suite);
+MODULE_DESCRIPTION("Test cases for string functions");
MODULE_LICENSE("GPL v2");
diff --git a/mm/compaction.c b/mm/compaction.c
index e731d45befc7..739b1bf3d637 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -79,6 +79,13 @@ static inline bool is_via_compact_memory(int order) { return false; }
#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT)
#endif
+static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags)
+{
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ return page;
+}
+#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__))
+
static void split_map_pages(struct list_head *freepages)
{
unsigned int i, order;
@@ -93,7 +100,7 @@ static void split_map_pages(struct list_head *freepages)
nr_pages = 1 << order;
- post_alloc_hook(page, order, __GFP_MOVABLE);
+ mark_allocated(page, order, __GFP_MOVABLE);
if (order)
split_page(page, order);
@@ -122,7 +129,7 @@ static unsigned long release_free_list(struct list_head *freepages)
* Convert free pages into post allocation pages, so
* that we can free them via __free_page.
*/
- post_alloc_hook(page, order, __GFP_MOVABLE);
+ mark_allocated(page, order, __GFP_MOVABLE);
__free_pages(page, order);
if (pfn > high_pfn)
high_pfn = pfn;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6392f1cc97a3..e66823d6b10b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
* access frequencies are similar. This is for minimizing the monitoring
* overhead under the dynamically changeable access pattern. If a merge was
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ *
+ * The total number of regions could be higher than the user-defined limit,
+ * max_nr_regions for some cases. For example, the user can update
+ * max_nr_regions to a number that lower than the current number of regions
+ * while DAMON is running. For such a case, repeat merging until the limit is
+ * met while increasing @threshold up to possible maximum level.
*/
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
unsigned long sz_limit)
{
struct damon_target *t;
-
- damon_for_each_target(t, c)
- damon_merge_regions_of(t, threshold, sz_limit);
+ unsigned int nr_regions;
+ unsigned int max_thres;
+
+ max_thres = c->attrs.aggr_interval /
+ (c->attrs.sample_interval ? c->attrs.sample_interval : 1);
+ do {
+ nr_regions = 0;
+ damon_for_each_target(t, c) {
+ damon_merge_regions_of(t, threshold, sz_limit);
+ nr_regions += damon_nr_regions(t);
+ }
+ threshold = max(1, threshold * 2);
+ } while (nr_regions > c->attrs.max_nr_regions &&
+ threshold / 2 < max_thres);
}
/*
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index b104a353b532..e4969fb54da3 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -40,22 +40,7 @@
* Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
* expectations that are being validated here. All future changes in here
* or the documentation need to be in sync.
- *
- * On s390 platform, the lower 4 bits are used to identify given page table
- * entry type. But these bits might affect the ability to clear entries with
- * pxx_clear() because of how dynamic page table folding works on s390. So
- * while loading up the entries do not change the lower 4 bits. It does not
- * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
- * used to mark a pte entry.
*/
-#define S390_SKIP_MASK GENMASK(3, 0)
-#if __BITS_PER_LONG == 64
-#define PPC64_SKIP_MASK GENMASK(62, 62)
-#else
-#define PPC64_SKIP_MASK 0x0
-#endif
-#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
-#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
#define RANDOM_NZVALUE GENMASK(7, 0)
struct pgtable_debug_args {
@@ -511,8 +496,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating PUD clear\n");
- pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pudp, pud);
+ WARN_ON(pud_none(pud));
pud_clear(args->pudp);
pud = READ_ONCE(*args->pudp);
WARN_ON(!pud_none(pud));
@@ -548,8 +532,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating P4D clear\n");
- p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->p4dp, p4d);
+ WARN_ON(p4d_none(p4d));
p4d_clear(args->p4dp);
p4d = READ_ONCE(*args->p4dp);
WARN_ON(!p4d_none(p4d));
@@ -582,8 +565,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating PGD clear\n");
- pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pgdp, pgd);
+ WARN_ON(pgd_none(pgd));
pgd_clear(args->pgdp);
pgd = READ_ONCE(*args->pgdp);
WARN_ON(!pgd_none(pgd));
@@ -634,10 +616,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args)
if (WARN_ON(!args->ptep))
return;
-#ifndef CONFIG_RISCV
- pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
-#endif
set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+ WARN_ON(pte_none(pte));
flush_dcache_page(page);
barrier();
ptep_clear(args->mm, args->vaddr, args->ptep);
@@ -650,8 +630,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args)
pmd_t pmd = READ_ONCE(*args->pmdp);
pr_debug("Validating PMD clear\n");
- pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pmdp, pmd);
+ WARN_ON(pmd_none(pmd));
pmd_clear(args->pmdp);
pmd = READ_ONCE(*args->pmdp);
WARN_ON(!pmd_none(pmd));
diff --git a/mm/filemap.c b/mm/filemap.c
index 876cc64aadd7..657bcd887fdb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1847,7 +1847,7 @@ repeat:
if (!folio || xa_is_value(folio))
goto out;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto repeat;
if (unlikely(folio != xas_reload(&xas))) {
@@ -2001,7 +2001,7 @@ retry:
if (!folio || xa_is_value(folio))
return folio;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto reset;
if (unlikely(folio != xas_reload(xas))) {
@@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (xa_is_value(folio))
goto update_start;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
break;
if (xa_is_sibling(folio))
break;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
- if (vm_flags & VM_HUGEPAGE) {
+ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
@@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
- ptep = pte_offset_map(vmf->pmd, vmf->address);
+ ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
+ &vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
@@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
continue;
if (folio_test_locked(folio))
continue;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
continue;
/* Has the page moved or been split? */
if (unlikely(folio != xas_reload(xas)))
@@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;
+ /* Flush stats (and potentially sleep) outside the RCU read section. */
+ mem_cgroup_flush_stats_ratelimited(NULL);
+
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
@@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
- if (workingset_test_recent(shadow, true, &workingset))
+ if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;
goto resched;
diff --git a/mm/gup.c b/mm/gup.c
index ca0f5cedce9b..f1d6bc06eb52 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -76,7 +76,7 @@ retry:
folio = page_folio(page);
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
return NULL;
- if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
+ if (unlikely(!folio_ref_try_add(folio, refs)))
return NULL;
/*
@@ -97,95 +97,6 @@ retry:
return folio;
}
-/**
- * try_grab_folio() - Attempt to get or pin a folio.
- * @page: pointer to page to be grabbed
- * @refs: the value to (effectively) add to the folio's refcount
- * @flags: gup flags: these are the FOLL_* flag values.
- *
- * "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
- *
- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
- * same time. (That's true throughout the get_user_pages*() and
- * pin_user_pages*() APIs.) Cases:
- *
- * FOLL_GET: folio's refcount will be incremented by @refs.
- *
- * FOLL_PIN on large folios: folio's refcount will be incremented by
- * @refs, and its pincount will be incremented by @refs.
- *
- * FOLL_PIN on single-page folios: folio's refcount will be incremented by
- * @refs * GUP_PIN_COUNTING_BIAS.
- *
- * Return: The folio containing @page (with refcount appropriately
- * incremented) for success, or NULL upon failure. If neither FOLL_GET
- * nor FOLL_PIN was set, that's considered failure, and furthermore,
- * a likely bug in the caller, so a warning is also emitted.
- */
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
-{
- struct folio *folio;
-
- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
- return NULL;
-
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
- return NULL;
-
- if (flags & FOLL_GET)
- return try_get_folio(page, refs);
-
- /* FOLL_PIN is set */
-
- /*
- * Don't take a pin on the zero page - it's not going anywhere
- * and it is used in a *lot* of places.
- */
- if (is_zero_page(page))
- return page_folio(page);
-
- folio = try_get_folio(page, refs);
- if (!folio)
- return NULL;
-
- /*
- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
- * right zone, so fail and let the caller fall back to the slow
- * path.
- */
- if (unlikely((flags & FOLL_LONGTERM) &&
- !folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
- return NULL;
- }
-
- /*
- * When pinning a large folio, use an exact count to track it.
- *
- * However, be sure to *also* increment the normal folio
- * refcount field at least once, so that the folio really
- * is pinned. That's why the refcount from the earlier
- * try_get_folio() is left intact.
- */
- if (folio_test_large(folio))
- atomic_add(refs, &folio->_pincount);
- else
- folio_ref_add(folio,
- refs * (GUP_PIN_COUNTING_BIAS - 1));
- /*
- * Adjust the pincount before re-checking the PTE for changes.
- * This is essentially a smp_mb() and is paired with a memory
- * barrier in folio_try_share_anon_rmap_*().
- */
- smp_mb__after_atomic();
-
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
-
- return folio;
-}
-
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
}
/**
- * try_grab_page() - elevate a page's refcount by a flag-dependent amount
- * @page: pointer to page to be grabbed
- * @flags: gup flags: these are the FOLL_* flag values.
+ * try_grab_folio() - add a folio's refcount by a flag-dependent amount
+ * @folio: pointer to folio to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values
*
* This might not do anything at all, depending on the flags argument.
*
* "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
*
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases: please see the try_grab_folio() documentation, with
- * "refs=1".
+ * time.
*
* Return: 0 for success, or if no action was required (if neither FOLL_PIN
* nor FOLL_GET was set, nothing is done). A negative error code for failure:
*
- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
+ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not
* be grabbed.
+ *
+ * It is called when we have a stable reference for the folio, typically in
+ * GUP slow path.
*/
-int __must_check try_grab_page(struct page *page, unsigned int flags)
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags)
{
- struct folio *folio = page_folio(page);
-
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM;
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
return -EREMOTEIO;
if (flags & FOLL_GET)
- folio_ref_inc(folio);
+ folio_ref_add(folio, refs);
else if (flags & FOLL_PIN) {
/*
* Don't take a pin on the zero page - it's not going anywhere
* and it is used in a *lot* of places.
*/
- if (is_zero_page(page))
+ if (is_zero_folio(folio))
return 0;
/*
- * Similar to try_grab_folio(): be sure to *also*
- * increment the normal page refcount field at least once,
+ * Increment the normal page refcount field at least once,
* so that the page really is pinned.
*/
if (folio_test_large(folio)) {
- folio_ref_add(folio, 1);
- atomic_add(1, &folio->_pincount);
+ folio_ref_add(folio, refs);
+ atomic_add(refs, &folio->_pincount);
} else {
- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
}
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
}
return 0;
@@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz,
return nr;
}
+
+/**
+ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
+ * @page: pointer to page to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values.
+ *
+ * "grab" names in this file mean, "look at flags to decide whether to use
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
+ *
+ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
+ * same time. (That's true throughout the get_user_pages*() and
+ * pin_user_pages*() APIs.) Cases:
+ *
+ * FOLL_GET: folio's refcount will be incremented by @refs.
+ *
+ * FOLL_PIN on large folios: folio's refcount will be incremented by
+ * @refs, and its pincount will be incremented by @refs.
+ *
+ * FOLL_PIN on single-page folios: folio's refcount will be incremented by
+ * @refs * GUP_PIN_COUNTING_BIAS.
+ *
+ * Return: The folio containing @page (with refcount appropriately
+ * incremented) for success, or NULL upon failure. If neither FOLL_GET
+ * nor FOLL_PIN was set, that's considered failure, and furthermore,
+ * a likely bug in the caller, so a warning is also emitted.
+ *
+ * It uses add ref unless zero to elevate the folio refcount and must be called
+ * in fast path only.
+ */
+static struct folio *try_grab_folio_fast(struct page *page, int refs,
+ unsigned int flags)
+{
+ struct folio *folio;
+
+ /* Raise warn if it is not called in fast GUP */
+ VM_WARN_ON_ONCE(!irqs_disabled());
+
+ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
+ return NULL;
+
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ return NULL;
+
+ if (flags & FOLL_GET)
+ return try_get_folio(page, refs);
+
+ /* FOLL_PIN is set */
+
+ /*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+ if (is_zero_page(page))
+ return page_folio(page);
+
+ folio = try_get_folio(page, refs);
+ if (!folio)
+ return NULL;
+
+ /*
+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+ * right zone, so fail and let the caller fall back to the slow
+ * path.
+ */
+ if (unlikely((flags & FOLL_LONGTERM) &&
+ !folio_is_longterm_pinnable(folio))) {
+ if (!put_devmap_managed_folio_refs(folio, refs))
+ folio_put_refs(folio, refs);
+ return NULL;
+ }
+
+ /*
+ * When pinning a large folio, use an exact count to track it.
+ *
+ * However, be sure to *also* increment the normal folio
+ * refcount field at least once, so that the folio really
+ * is pinned. That's why the refcount from the earlier
+ * try_get_folio() is left intact.
+ */
+ if (folio_test_large(folio))
+ atomic_add(refs, &folio->_pincount);
+ else
+ folio_ref_add(folio,
+ refs * (GUP_PIN_COUNTING_BIAS - 1));
+ /*
+ * Adjust the pincount before re-checking the PTE for changes.
+ * This is essentially a smp_mb() and is paired with a memory
+ * barrier in folio_try_share_anon_rmap_*().
+ */
+ smp_mb__after_atomic();
+
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+
+ return folio;
+}
#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
#ifdef CONFIG_ARCH_HAS_HUGEPD
@@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
*/
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
unsigned long addr, unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
unsigned long pte_end;
struct page *page;
@@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
page = pte_page(pte);
refs = record_subpages(page, sz, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
- if (!folio)
- return 0;
+ if (fast) {
+ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+ } else {
+ folio = page_folio(page);
+ if (try_grab_folio(folio, refs, flags))
+ return 0;
+ }
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, refs, flags);
@@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
pte_t *ptep;
unsigned long sz = 1UL << hugepd_shift(hugepd);
@@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
+ ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr,
+ fast);
if (ret != 1)
return ret;
} while (ptep++, addr = next, addr != end);
@@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
- flags, &page, &nr);
+ flags, &page, &nr, false);
spin_unlock(ptl);
if (ret == 1) {
@@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
return 0;
}
@@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
gup_must_unshare(vma, flags, page))
return ERR_PTR(-EMLINK);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
else
@@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
return ERR_PTR(ret);
@@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
- ret = try_grab_page(page, flags);
+ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (unlikely(ret)) {
page = ERR_PTR(ret);
goto out;
@@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
goto unmap;
*page = pte_page(entry);
}
- ret = try_grab_page(*page, gup_flags);
+ ret = try_grab_folio(page_folio(*page), 1, gup_flags);
if (unlikely(ret))
goto unmap;
out:
@@ -1636,20 +1651,19 @@ next_page:
* pages.
*/
if (page_increm > 1) {
- struct folio *folio;
+ struct folio *folio = page_folio(page);
/*
* Since we already hold refcount on the
* large folio, this should never fail.
*/
- folio = try_grab_folio(page, page_increm - 1,
- foll_flags);
- if (WARN_ON_ONCE(!folio)) {
+ if (try_grab_folio(folio, page_increm - 1,
+ foll_flags)) {
/*
* Release the 1st page ref if the
* folio is problematic, fail hard.
*/
- gup_put_folio(page_folio(page), 1,
+ gup_put_folio(folio, 1,
foll_flags);
ret = -EFAULT;
goto out;
@@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
#ifdef CONFIG_HAVE_GUP_FAST
-
/*
* Used in the GUP-fast path to determine whether GUP is permitted to work on
* a specific folio.
@@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio)
goto pte_unmap;
@@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
break;
}
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
@@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
page = pmd_page(orig);
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
page = pud_page(orig);
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
page = pgd_page(orig);
refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
* pmd format and THP pmd format
*/
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
- PMD_SHIFT, next, flags, pages, nr) != 1)
+ PMD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
pages, nr))
@@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
return 0;
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
- PUD_SHIFT, next, flags, pages, nr) != 1)
+ PUD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
pages, nr))
@@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
BUILD_BUG_ON(p4d_leaf(p4d));
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
- P4D_SHIFT, next, flags, pages, nr) != 1)
+ P4D_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
pages, nr))
@@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
return;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, flags, pages, nr) != 1)
+ PGDIR_SHIFT, next, flags, pages, nr,
+ true) != 1)
return;
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
pages, nr))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 89932fd0f62e..2120f7478e55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
@@ -3009,30 +3009,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
if (new_order >= folio_order(folio))
return -EINVAL;
- /* Cannot split anonymous THP to order-1 */
- if (new_order == 1 && folio_test_anon(folio)) {
- VM_WARN_ONCE(1, "Cannot split to order-1 folio");
- return -EINVAL;
- }
-
- if (new_order) {
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio))
+ if (folio_test_anon(folio)) {
+ /* order-1 is not supported for anonymous THP. */
+ if (new_order == 1) {
+ VM_WARN_ONCE(1, "Cannot split to order-1 folio");
return -EINVAL;
+ }
+ } else if (new_order) {
/* Split shmem folio to non-zero order not supported */
if (shmem_mapping(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split shmem folio to non-0 order");
return -EINVAL;
}
- /* No split if the file system does not support large folio */
- if (!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * No split if the file system does not support large folio.
+ * Note that we might still have THPs in such mappings due to
+ * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+ * does not actually support large folios properly.
+ */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split file folio to non-0 order");
return -EINVAL;
}
}
+ /* Only swapping a whole PMD-mapped folio is supported */
+ if (folio_test_swapcache(folio) && new_order)
+ return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
if (is_hzp) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f35abff8be60..43e1af868cfd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
* folio appears as just a compound page. Otherwise, wait until after
* allocating vmemmap to clear the flag.
*
- * A reference is held on the folio, except in the case of demote.
- *
* Must be called with hugetlb lock held.
*/
-static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus,
- bool demote)
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
+ bool adjust_surplus)
{
int nid = folio_nid(folio);
@@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
list_del(&folio->lru);
if (folio_test_hugetlb_freed(folio)) {
+ folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
}
@@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
if (!folio_test_hugetlb_vmemmap_optimized(folio))
__folio_clear_hugetlb(folio);
- /*
- * In the case of demote we do not ref count the page as it will soon
- * be turned into a page of smaller size.
- */
- if (!demote)
- folio_ref_unfreeze(folio, 1);
-
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
}
-static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, false);
-}
-
-static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, true);
-}
-
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
- int zeroed;
int nid = folio_nid(folio);
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
@@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
*/
folio_set_hugetlb_vmemmap_optimized(folio);
- /*
- * This folio is about to be managed by the hugetlb allocator and
- * should have no users. Drop our reference, and check for others
- * just in case.
- */
- zeroed = folio_put_testzero(folio);
- if (unlikely(!zeroed))
- /*
- * It is VERY unlikely soneone else has taken a ref
- * on the folio. In this case, we simply return as
- * free_huge_folio() will be called when this other ref
- * is dropped.
- */
- return;
-
arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
}
@@ -1763,13 +1726,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
- /*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
*/
@@ -1780,6 +1736,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
+ folio_ref_unfreeze(folio, 1);
+
+ /*
* Non-gigantic pages demoted from CMA allocated gigantic pages
* need to be given back to CMA in free_gigantic_folio.
*/
@@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
nid = numa_mem_id();
retry:
folio = __folio_alloc(gfp_mask, order, nid, nmask);
+ /* Ensure hugetlb folio won't have large_rmappable flag set. */
+ if (folio)
+ folio_clear_large_rmappable(folio);
if (folio && !folio_ref_freeze(folio, 1)) {
folio_put(folio);
@@ -3079,11 +3047,8 @@ retry:
free_new:
spin_unlock_irq(&hugetlb_lock);
- if (new_folio) {
- /* Folio has a zero ref count, but needs a ref to be freed */
- folio_ref_unfreeze(new_folio, 1);
+ if (new_folio)
update_and_free_hugetlb_folio(h, new_folio, false);
- }
return ret;
}
@@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
- remove_hugetlb_folio_for_demote(h, folio, false);
+ remove_hugetlb_folio(h, folio, false);
spin_unlock_irq(&hugetlb_lock);
/*
@@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
if (rc) {
/* Allocation of vmemmmap failed, we can not demote folio */
spin_lock_irq(&hugetlb_lock);
- folio_ref_unfreeze(folio, 1);
add_hugetlb_folio(h, folio, false);
return rc;
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index b9a55322e52c..8193906515c6 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!folio_test_hugetlb_vmemmap_optimized(folio))
return 0;
@@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
*/
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
{
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
}
@@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
long restored = 0;
long ret = 0;
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
ret = __hugetlb_vmemmap_restore_folio(h, folio,
@@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!vmemmap_should_optimize_folio(h, folio))
return ret;
@@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
{
LIST_HEAD(vmemmap_pages);
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
free_vmemmap_page_list(&vmemmap_pages);
}
@@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
flush_tlb_all();
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry(folio, folio_list, lru) {
int ret;
diff --git a/mm/internal.h b/mm/internal.h
index b2c75b12014e..cc2c5e07fad3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -588,7 +588,6 @@ extern void __putback_isolated_page(struct page *page, unsigned int order,
extern void memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order);
-extern void kernel_init_pages(struct page *page, int numpages);
/*
* This will have no effect, other than possibly generating a warning, if the
@@ -1183,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page);
/*
* mm/gup.c
*/
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
-int __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags);
/*
* mm/huge_memory.c
@@ -1436,11 +1435,6 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
#ifdef CONFIG_64BIT
-/* VM is sealed, in vm_flags */
-#define VM_SEALED _BITUL(63)
-#endif
-
-#ifdef CONFIG_64BIT
static inline int can_do_mseal(unsigned long flags)
{
if (flags)
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index e7c9a4dc89f8..85e7c6b4575c 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -532,7 +532,7 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip)
return;
/* Unpoison the object and save alloc info for non-kmalloc() allocations. */
- unpoison_slab_object(slab->slab_cache, ptr, size, flags);
+ unpoison_slab_object(slab->slab_cache, ptr, flags, false);
/* Poison the redzone and save alloc info for kmalloc() allocations. */
if (is_kmalloc_cache(slab->slab_cache))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 774a97e6e2da..aab471791bd9 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2000,9 +2000,9 @@ out_unlock:
if (!is_shmem) {
filemap_nr_thps_inc(mapping);
/*
- * Paired with smp_mb() in do_dentry_open() to ensure
- * i_writecount is up to date and the update to nr_thps is
- * visible. Ensures the page cache will be truncated if the
+ * Paired with the fence in do_dentry_open() -> get_write_access()
+ * to ensure i_writecount is up to date and the update to nr_thps
+ * is visible. Ensures the page cache will be truncated if the
* file is opened writable.
*/
smp_mb();
@@ -2190,8 +2190,8 @@ rollback:
if (!is_shmem && result == SCAN_COPY_MC) {
filemap_nr_thps_dec(mapping);
/*
- * Paired with smp_mb() in do_dentry_open() to
- * ensure the update to nr_thps is visible.
+ * Paired with the fence in do_dentry_open() -> get_write_access()
+ * to ensure the update to nr_thps is visible.
*/
smp_mb();
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 08e9806b1cf9..e81fb68f7f88 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -754,7 +754,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
/* calculate lose page */
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (nid == NUMA_NO_NODE)
+ if (!numa_valid_node(nid))
nr_pages += end_pfn - start_pfn;
}
@@ -1061,7 +1061,7 @@ static bool should_skip_region(struct memblock_type *type,
return false;
/* only memory regions are associated with nodes, check it */
- if (nid != NUMA_NO_NODE && nid != m_nid)
+ if (numa_valid_node(nid) && nid != m_nid)
return true;
/* skip hotpluggable memory regions if needed */
@@ -1118,10 +1118,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
int idx_a = *idx & 0xffffffff;
int idx_b = *idx >> 32;
- if (WARN_ONCE(nid == MAX_NUMNODES,
- "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
for (; idx_a < type_a->cnt; idx_a++) {
struct memblock_region *m = &type_a->regions[idx_a];
@@ -1215,9 +1211,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
int idx_a = *idx & 0xffffffff;
int idx_b = *idx >> 32;
- if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
if (*idx == (u64)ULLONG_MAX) {
idx_a = type_a->cnt - 1;
if (type_b != NULL)
@@ -1303,7 +1296,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
continue;
- if (nid == MAX_NUMNODES || nid == r_nid)
+ if (!numa_valid_node(nid) || nid == r_nid)
break;
}
if (*idx >= type->cnt) {
@@ -1339,10 +1332,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
int start_rgn, end_rgn;
int i, ret;
- if (WARN_ONCE(nid == MAX_NUMNODES,
- "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
if (ret)
return ret;
@@ -1452,9 +1441,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
- if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
@@ -1467,7 +1453,7 @@ again:
if (found && !memblock_reserve(found, size))
goto done;
- if (nid != NUMA_NO_NODE && !exact_nid) {
+ if (numa_valid_node(nid) && !exact_nid) {
found = memblock_find_in_range_node(size, align, start,
end, NUMA_NO_NODE,
flags);
@@ -1987,7 +1973,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
end = base + size - 1;
flags = rgn->flags;
#ifdef CONFIG_NUMA
- if (memblock_get_region_node(rgn) != MAX_NUMNODES)
+ if (numa_valid_node(memblock_get_region_node(rgn)))
snprintf(nid_buf, sizeof(nid_buf), " on node %d",
memblock_get_region_node(rgn));
#endif
@@ -2181,7 +2167,7 @@ static void __init memmap_init_reserved_pages(void)
start = region->base;
end = start + region->size;
- if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES)
+ if (!numa_valid_node(nid))
nid = early_pfn_to_nid(PFN_DOWN(start));
reserve_bootmem_region(start, end, nid);
@@ -2272,7 +2258,7 @@ static int memblock_debug_show(struct seq_file *m, void *private)
seq_printf(m, "%4d: ", i);
seq_printf(m, "%pa..%pa ", &reg->base, &end);
- if (nid != MAX_NUMNODES)
+ if (numa_valid_node(nid))
seq_printf(m, "%4d ", nid);
else
seq_printf(m, "%4c ", 'x');
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36793e509f47..8f2f1bb18c9c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7745,8 +7745,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
* @new: Replacement folio.
*
* Charge @new as a replacement folio for @old. @old will
- * be uncharged upon free. This is only used by the page cache
- * (in replace_page_cache_folio()).
+ * be uncharged upon free.
*
* Both folios must be locked, @new->mapping must be set up.
*/
@@ -7824,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
/* Transfer the charge and the css ref */
commit_charge(new, memcg);
- /*
- * If the old folio is a large folio and is in the split queue, it needs
- * to be removed from the split queue now, in case getting an incorrect
- * split queue in destroy_large_folio() after the memcg of the old folio
- * is cleared.
- *
- * In addition, the old folio is about to be freed after migration, so
- * removing from the split queue a bit earlier seems reasonable.
- */
- if (folio_test_large(old) && folio_test_large_rmappable(old))
- folio_undo_large_rmappable(old);
old->memcg_data = 0;
}
diff --git a/mm/memory.c b/mm/memory.c
index 0f47a533014e..d10e616d7389 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1507,12 +1507,6 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb,
if (unlikely(folio_mapcount(folio) < 0))
print_bad_pte(vma, addr, ptent, page);
}
-
- if (want_init_mlocked_on_free() && folio_test_mlocked(folio) &&
- !delay_rmap && folio_test_anon(folio)) {
- kernel_init_pages(page, folio_nr_pages(folio));
- }
-
if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) {
*force_flush = true;
*force_break = true;
@@ -4614,8 +4608,9 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return ret;
- if (page != &folio->page || folio_order(folio) != HPAGE_PMD_ORDER)
+ if (folio_order(folio) != HPAGE_PMD_ORDER)
return ret;
+ page = &folio->page;
/*
* Just backoff if any subpage of a THP is corrupted otherwise
@@ -5106,10 +5101,16 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
bool ignore_writable, bool pte_write_upgrade)
{
int nr = pte_pfn(fault_pte) - folio_pfn(folio);
- unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start);
- unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end);
- pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE;
- unsigned long addr;
+ unsigned long start, end, addr = vmf->address;
+ unsigned long addr_start = addr - (nr << PAGE_SHIFT);
+ unsigned long pt_start = ALIGN_DOWN(addr, PMD_SIZE);
+ pte_t *start_ptep;
+
+ /* Stay within the VMA and within the page table. */
+ start = max3(addr_start, pt_start, vma->vm_start);
+ end = min3(addr_start + folio_size(folio), pt_start + PMD_SIZE,
+ vma->vm_end);
+ start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT);
/* Restore all PTEs' mapping of the large folio */
for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) {
diff --git a/mm/migrate.c b/mm/migrate.c
index dd04f578c19c..a8c6f466e33a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) &&
+ folio_test_large_rmappable(folio)) {
+ if (!folio_ref_freeze(folio, expected_count))
+ return -EAGAIN;
+ folio_undo_large_rmappable(folio);
+ folio_ref_unfreeze(folio, expected_count);
+ }
+
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
@@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping,
return -EAGAIN;
}
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) && folio_test_large_rmappable(folio))
+ folio_undo_large_rmappable(folio);
+
/*
* Now we know that no one else is looking at the folio:
* no turning back from here.
@@ -1654,7 +1667,16 @@ static int migrate_pages_batch(struct list_head *from,
/*
* The rare folio on the deferred split list should
- * be split now. It should not count as a failure.
+ * be split now. It should not count as a failure:
+ * but increment nr_failed because, without doing so,
+ * migrate_pages() may report success with (split but
+ * unmigrated) pages still on its fromlist; whereas it
+ * always reports success when its fromlist is empty.
+ * stats->nr_thp_failed should be increased too,
+ * otherwise stats inconsistency will happen when
+ * migrate_pages_batch is called via migrate_pages()
+ * with MIGRATE_SYNC and MIGRATE_ASYNC.
+ *
* Only check it without removing it from the list.
* Since the folio can be on deferred_split_scan()
* local list and removing it can cause the local list
@@ -1669,6 +1691,8 @@ static int migrate_pages_batch(struct list_head *from,
if (nr_pages > 2 &&
!list_empty(&folio->_deferred_list)) {
if (try_split_folio(folio, split_folios) == 0) {
+ nr_failed++;
+ stats->nr_thp_failed += is_thp;
stats->nr_thp_split += is_thp;
stats->nr_split++;
continue;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f72b852bd5b8..3ec04933f7fd 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2523,9 +2523,6 @@ EXPORT_SYMBOL(init_on_alloc);
DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
EXPORT_SYMBOL(init_on_free);
-DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
-EXPORT_SYMBOL(init_mlocked_on_free);
-
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)
@@ -2543,14 +2540,6 @@ static int __init early_init_on_free(char *buf)
}
early_param("init_on_free", early_init_on_free);
-static bool _init_mlocked_on_free_enabled_early __read_mostly
- = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON);
-static int __init early_init_mlocked_on_free(char *buf)
-{
- return kstrtobool(buf, &_init_mlocked_on_free_enabled_early);
-}
-early_param("init_mlocked_on_free", early_init_mlocked_on_free);
-
DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
/*
@@ -2578,21 +2567,12 @@ static void __init mem_debugging_and_hardening_init(void)
}
#endif
- if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early ||
- _init_mlocked_on_free_enabled_early) &&
+ if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
page_poisoning_requested) {
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
- "will take precedence over init_on_alloc, init_on_free "
- "and init_mlocked_on_free\n");
+ "will take precedence over init_on_alloc and init_on_free\n");
_init_on_alloc_enabled_early = false;
_init_on_free_enabled_early = false;
- _init_mlocked_on_free_enabled_early = false;
- }
-
- if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) {
- pr_info("mem auto-init: init_on_free is on, "
- "will take precedence over init_mlocked_on_free\n");
- _init_mlocked_on_free_enabled_early = false;
}
if (_init_on_alloc_enabled_early) {
@@ -2609,17 +2589,9 @@ static void __init mem_debugging_and_hardening_init(void)
static_branch_disable(&init_on_free);
}
- if (_init_mlocked_on_free_enabled_early) {
- want_check_pages = true;
- static_branch_enable(&init_mlocked_on_free);
- } else {
- static_branch_disable(&init_mlocked_on_free);
- }
-
- if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early ||
- _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early))
- pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and "
- "init_mlocked_on_free are disabled when running KMSAN\n");
+ if (IS_ENABLED(CONFIG_KMSAN) &&
+ (_init_on_alloc_enabled_early || _init_on_free_enabled_early))
+ pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n");
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled()) {
@@ -2658,10 +2630,9 @@ static void __init report_meminit(void)
else
stack = "off";
- pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n",
+ pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
- want_init_on_free() ? "on" : "off",
- want_init_mlocked_on_free() ? "on" : "off");
+ want_init_on_free() ? "on" : "off");
if (want_init_on_free())
pr_info("mem auto-init: clearing system memory may take some time...\n");
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 12c9297ed4a7..8a1c92090129 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -415,13 +415,20 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
else
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
- if (bg_thresh >= thresh)
- bg_thresh = thresh / 2;
tsk = current;
if (rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
}
+ /*
+ * Dirty throttling logic assumes the limits in page units fit into
+ * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
+ */
+ if (thresh > UINT_MAX)
+ thresh = UINT_MAX;
+ /* This makes sure bg_thresh is within 32-bits as well */
+ if (bg_thresh >= thresh)
+ bg_thresh = thresh / 2;
dtc->thresh = thresh;
dtc->bg_thresh = bg_thresh;
@@ -471,7 +478,11 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
if (rt_task(tsk))
dirty += dirty / 4;
- return dirty;
+ /*
+ * Dirty throttling logic assumes the limits in page units fit into
+ * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
+ */
+ return min_t(unsigned long, dirty, UINT_MAX);
}
/**
@@ -508,10 +519,17 @@ static int dirty_background_bytes_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
+ unsigned long old_bytes = dirty_background_bytes;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
- if (ret == 0 && write)
+ if (ret == 0 && write) {
+ if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) >
+ UINT_MAX) {
+ dirty_background_bytes = old_bytes;
+ return -ERANGE;
+ }
dirty_background_ratio = 0;
+ }
return ret;
}
@@ -537,6 +555,10 @@ static int dirty_bytes_handler(struct ctl_table *table, int write,
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
+ if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) {
+ vm_dirty_bytes = old_bytes;
+ return -ERANGE;
+ }
writeback_set_ratelimit();
vm_dirty_ratio = 0;
}
@@ -1660,7 +1682,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
*/
dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh);
dtc->wb_bg_thresh = dtc->thresh ?
- div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+ div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 222299b5c0e6..9ecf99190ea2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -504,10 +504,15 @@ out:
static inline unsigned int order_to_pindex(int migratetype, int order)
{
+ bool __maybe_unused movable;
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (order > PAGE_ALLOC_COSTLY_ORDER) {
VM_BUG_ON(order != HPAGE_PMD_ORDER);
- return NR_LOWORDER_PCP_LISTS;
+
+ movable = migratetype == MIGRATE_MOVABLE;
+
+ return NR_LOWORDER_PCP_LISTS + movable;
}
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
@@ -521,7 +526,7 @@ static inline int pindex_to_order(unsigned int pindex)
int order = pindex / MIGRATE_PCPTYPES;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pindex == NR_LOWORDER_PCP_LISTS)
+ if (pindex >= NR_LOWORDER_PCP_LISTS)
order = HPAGE_PMD_ORDER;
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
@@ -1016,7 +1021,7 @@ static inline bool should_skip_kasan_poison(struct page *page)
return page_kasan_tag(page) == KASAN_TAG_KERNEL;
}
-void kernel_init_pages(struct page *page, int numpages)
+static void kernel_init_pages(struct page *page, int numpages)
{
int i;
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 4169576bed72..509c6ef8de40 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -73,6 +73,9 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt)
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -110,6 +113,9 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt,
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -140,7 +146,10 @@ void __page_table_check_zero(struct page *page, unsigned int order)
BUG_ON(PageSlab(page));
page_ext = page_ext_get(page);
- BUG_ON(!page_ext);
+
+ if (!page_ext)
+ return;
+
for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);
diff --git a/mm/readahead.c b/mm/readahead.c
index c1b23989d9ca..817b2a352d78 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
limit = min(limit, index + ra->size - 1);
- if (new_order < MAX_PAGECACHE_ORDER) {
+ if (new_order < MAX_PAGECACHE_ORDER)
new_order += 2;
- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
- new_order = min_t(unsigned int, new_order, ilog2(ra->size));
- }
+
+ new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+ new_order = min_t(unsigned int, new_order, ilog2(ra->size));
/* See comment in page_cache_ra_unbounded() */
nofs = memalloc_nofs_save();
diff --git a/mm/shmem.c b/mm/shmem.c
index f5d60436b604..831b52dfd56e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
+static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
{
loff_t i_size;
@@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
}
}
+bool shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
+{
+ if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
+ return false;
+
+ return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags);
+}
+
#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
@@ -1786,7 +1797,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
xa_lock_irq(&swap_mapping->i_pages);
error = shmem_replace_entry(swap_mapping, swap_index, old, new);
if (!error) {
- mem_cgroup_migrate(old, new);
+ mem_cgroup_replace_folio(old, new);
__lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
__lruvec_stat_mod_folio(new, NR_SHMEM, 1);
__lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
@@ -3166,10 +3177,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
struct folio *folio;
/*
- * Good, the fallocate(2) manpage permits EINTR: we may have
- * been interrupted because we are using up too much memory.
+ * Check for fatal signal so that we abort early in OOM
+ * situations. We don't want to abort in case of non-fatal
+ * signals as large fallocate can take noticeable time and
+ * e.g. periodic timers may result in fallocate constantly
+ * restarting.
*/
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
error = -EINTR;
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
@@ -3903,14 +3917,14 @@ static const struct constant_table shmem_param_enums_huge[] = {
};
const struct fs_parameter_spec shmem_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("mpol", Opt_mpol),
fsparam_string("nr_blocks", Opt_nr_blocks),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
fsparam_flag ("inode32", Opt_inode32),
fsparam_flag ("inode64", Opt_inode64),
fsparam_flag ("noswap", Opt_noswap),
@@ -3970,9 +3984,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->mode = result.uint_32 & 07777;
break;
case Opt_uid:
- kuid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(kuid))
- goto bad_value;
+ kuid = result.uid;
/*
* The requested uid must be representable in the
@@ -3984,9 +3996,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->uid = kuid;
break;
case Opt_gid:
- kgid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(kgid))
- goto bad_value;
+ kgid = result.gid;
/*
* The requested gid must be representable in the
diff --git a/mm/slub.c b/mm/slub.c
index 1373ac365a46..4927edec6a8c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3902,7 +3902,6 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
- struct slabobj_ext *obj_exts;
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;
@@ -3945,9 +3944,11 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
if (need_slab_obj_ext()) {
+ struct slabobj_ext *obj_exts;
+
obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]);
-#ifdef CONFIG_MEM_ALLOC_PROFILING
/*
* Currently obj_exts is used only for allocation profiling.
* If other users appear then mem_alloc_profiling_enabled()
@@ -3955,8 +3956,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
*/
if (likely(obj_exts))
alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
-#endif
}
+#endif
}
return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
diff --git a/mm/util.c b/mm/util.c
index 6c3e6710e4de..fe723241b66f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -139,14 +139,14 @@ EXPORT_SYMBOL(kmemdup_noprof);
* kmemdup_array - duplicate a given array.
*
* @src: array to duplicate.
- * @element_size: size of each element of array.
* @count: number of elements to duplicate from array.
+ * @element_size: size of each element of array.
* @gfp: GFP mask to use.
*
* Return: duplicated array of @src or %NULL in case of error,
* result is physically contiguous. Use kfree() to free.
*/
-void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp)
{
return kmemdup(src, size_mul(element_size, count), gfp);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 45e1506d58c3..e34ea860153f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2498,6 +2498,7 @@ struct vmap_block {
struct list_head free_list;
struct rcu_head rcu_head;
struct list_head purge;
+ unsigned int cpu;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -2542,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
static struct xarray *
addr_to_vb_xa(unsigned long addr)
{
- int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
+ int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
+
+ /*
+ * Please note, nr_cpu_ids points on a highest set
+ * possible bit, i.e. we never invoke cpumask_next()
+ * if an index points on it which is nr_cpu_ids - 1.
+ */
+ if (!cpu_possible(index))
+ index = cpumask_next(index, cpu_possible_mask);
return &per_cpu(vmap_block_queue, index).vmap_blocks;
}
@@ -2625,8 +2634,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
free_vmap_area(va);
return ERR_PTR(err);
}
-
- vbq = raw_cpu_ptr(&vmap_block_queue);
+ /*
+ * list_add_tail_rcu could happened in another core
+ * rather than vb->cpu due to task migration, which
+ * is safe as list_add_tail_rcu will ensure the list's
+ * integrity together with list_for_each_rcu from read
+ * side.
+ */
+ vb->cpu = raw_smp_processor_id();
+ vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
@@ -2654,9 +2670,10 @@ static void free_vmap_block(struct vmap_block *vb)
}
static bool purge_fragmented_block(struct vmap_block *vb,
- struct vmap_block_queue *vbq, struct list_head *purge_list,
- bool force_purge)
+ struct list_head *purge_list, bool force_purge)
{
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu);
+
if (vb->free + vb->dirty != VMAP_BBMAP_BITS ||
vb->dirty == VMAP_BBMAP_BITS)
return false;
@@ -2704,7 +2721,7 @@ static void purge_fragmented_blocks(int cpu)
continue;
spin_lock(&vb->lock);
- purge_fragmented_block(vb, vbq, &purge, true);
+ purge_fragmented_block(vb, &purge, true);
spin_unlock(&vb->lock);
}
rcu_read_unlock();
@@ -2841,7 +2858,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
* not purgeable, check whether there is dirty
* space to be flushed.
*/
- if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
+ if (!purge_fragmented_block(vb, &purge_list, false) &&
vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
diff --git a/mm/workingset.c b/mm/workingset.c
index c22adb93622a..a2b28e356e68 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
+ * @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
/*
* Flush stats (and potentially sleep) outside the RCU read section.
+ *
+ * Note that workingset_test_recent() itself might be called in RCU read
+ * section (for e.g, in cachestat) - these callers need to skip flushing
+ * stats (via the flush argument).
+ *
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
- mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ if (flush)
+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- if (!workingset_test_recent(shadow, file, &workingset))
+ if (!workingset_test_recent(shadow, file, &workingset, true))
return;
folio_set_active(folio);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index ac74f6ead62d..8f6dd2c6ee41 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
+#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -132,6 +133,29 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding
+ * @vid: the VLAN identifier
+ *
+ * Return: true when either no vlan is set or if VLAN is in correct range,
+ * false otherwise
+ */
+static bool batadv_vlan_id_valid(unsigned short vid)
+{
+ unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK);
+
+ if (vid == 0)
+ return true;
+
+ if (!(vid & BATADV_VLAN_HAS_TAG))
+ return false;
+
+ if (non_vlan)
+ return false;
+
+ return true;
+}
+
+/**
* batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan
* object
* @orig_node: the originator serving the VLAN
@@ -149,6 +173,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
{
struct batadv_orig_node_vlan *vlan;
+ if (!batadv_vlan_id_valid(vid))
+ return NULL;
+
spin_lock_bh(&orig_node->vlan_list_lock);
/* first look if an object for this vid already exists */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b21ff3c36b07..2243cec18ecc 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -209,6 +209,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
+ * batadv_tt_local_entry_free_rcu() - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_local_entry *tt_local_entry;
+
+ tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+ common.rcu);
+
+ kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
+/**
* batadv_tt_local_entry_release() - release tt_local_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -222,7 +236,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
batadv_softif_vlan_put(tt_local_entry->vlan);
- kfree_rcu(tt_local_entry, common.rcu);
+ call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
}
/**
@@ -241,6 +255,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
}
/**
+ * batadv_tt_global_entry_free_rcu() - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_global_entry *tt_global_entry;
+
+ tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+ common.rcu);
+
+ kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
+/**
* batadv_tt_global_entry_release() - release tt_global_entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -254,7 +282,7 @@ void batadv_tt_global_entry_release(struct kref *ref)
batadv_tt_global_del_orig_list(tt_global_entry);
- kfree_rcu(tt_global_entry, common.rcu);
+ call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
}
/**
@@ -380,6 +408,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+ kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
+/**
* batadv_tt_orig_list_entry_release() - release tt orig entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the tt orig entry
@@ -392,7 +433,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
refcount);
batadv_orig_node_put(orig_entry->orig_node);
- kfree_rcu(orig_entry, rcu);
+ call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
}
/**
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 0c76dcde5361..080053a85b4d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -899,8 +899,8 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
U16_MAX, GFP_ATOMIC);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role, u16 handle)
+static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
{
struct hci_conn *conn;
@@ -1041,7 +1041,16 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
if (unlikely(handle < 0))
return ERR_PTR(-ECONNREFUSED);
- return hci_conn_add(hdev, type, dst, role, handle);
+ return __hci_conn_add(hdev, type, dst, role, handle);
+}
+
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
+{
+ if (handle > HCI_CONN_HANDLE_MAX)
+ return ERR_PTR(-EINVAL);
+
+ return __hci_conn_add(hdev, type, dst, role, handle);
}
static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index dd3b0f501018..c644b30977bd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -63,50 +63,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-static int hci_scan_req(struct hci_request *req, unsigned long opt)
-{
- __u8 scan = opt;
-
- BT_DBG("%s %x", req->hdev->name, scan);
-
- /* Inquiry and Page scans */
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
- return 0;
-}
-
-static int hci_auth_req(struct hci_request *req, unsigned long opt)
-{
- __u8 auth = opt;
-
- BT_DBG("%s %x", req->hdev->name, auth);
-
- /* Authentication */
- hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
- return 0;
-}
-
-static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
-{
- __u8 encrypt = opt;
-
- BT_DBG("%s %x", req->hdev->name, encrypt);
-
- /* Encryption */
- hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
- return 0;
-}
-
-static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
-{
- __le16 policy = cpu_to_le16(opt);
-
- BT_DBG("%s %x", req->hdev->name, policy);
-
- /* Default link policy */
- hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
- return 0;
-}
-
/* Get HCI device by index.
* Device is held on return. */
struct hci_dev *hci_dev_get(int index)
@@ -735,6 +691,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
struct hci_dev *hdev;
struct hci_dev_req dr;
+ __le16 policy;
int err = 0;
if (copy_from_user(&dr, arg, sizeof(dr)))
@@ -761,8 +718,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -773,19 +730,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev,
+ HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
if (err)
break;
}
- err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
/* Ensure that the connectable and discoverable states
* get correctly modified as this was a non-mgmt change.
@@ -795,8 +756,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
break;
case HCISETLINKPOL:
- err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ policy = cpu_to_le16(dr.dev_opt);
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
+ 2, &policy,
+ HCI_CMD_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -2751,7 +2715,11 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->rx_work);
+ cancel_work_sync(&hdev->cmd_work);
+ cancel_work_sync(&hdev->tx_work);
cancel_work_sync(&hdev->power_on);
+ cancel_work_sync(&hdev->error_reset);
hci_cmd_sync_clear(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a487f9df8145..93f7ac905cec 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -6311,6 +6311,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
+
+ if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+ &hdev->quirks)) {
+ info->primary_phy &= 0x1f;
+ info->secondary_phy &= 0x1f;
+ }
+
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
@@ -6660,6 +6667,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct bt_iso_qos *qos;
bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
+ u32 c_sdu_interval, p_sdu_interval;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6684,12 +6692,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
- /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
- qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
- qos->ucast.out.interval = qos->ucast.in.interval;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G
+ * page 3075:
+ * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) ×
+ * ISO_Interval + SDU_Interval_C_To_P
+ * ...
+ * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) -
+ * Transport_Latency
+ */
+ c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->c_latency);
+ p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->p_latency);
switch (conn->role) {
case HCI_ROLE_SLAVE:
+ qos->ucast.in.interval = c_sdu_interval;
+ qos->ucast.out.interval = p_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.in.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6703,6 +6724,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos->ucast.out.phy = ev->p_phy;
break;
case HCI_ROLE_MASTER:
+ qos->ucast.in.interval = p_sdu_interval;
+ qos->ucast.out.interval = c_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.out.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6893,6 +6916,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis = hci_conn_hash_lookup_handle(hdev, handle);
if (!bis) {
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_dbg(hdev, "ignore too large handle %u", handle);
+ continue;
+ }
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE, handle);
if (IS_ERR(bis))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a8a7d2b36870..eea34e6a236f 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -280,6 +280,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
}
EXPORT_SYMBOL(__hci_cmd_sync_status);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout);
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+EXPORT_SYMBOL(hci_cmd_sync_status);
+
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index cc055b952ce6..398fb81f7a13 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -1356,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- if (pi->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ if (test_bit(BT_SK_PA_SYNC, &pi->flags)) {
iso_conn_big_sync(sk);
sk->sk_state = BT_LISTEN;
} else {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index aed025734d04..c3c26bbb5dda 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6761,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
BT_DBG("chan %p, len %d", chan, skb->len);
+ l2cap_chan_lock(chan);
+
if (chan->state != BT_BOUND && chan->state != BT_CONNECTED)
goto drop;
@@ -6777,6 +6779,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
}
drop:
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
free_skb:
kfree_skb(skb);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 6db60946c627..ba437c6f6ee5 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
+ /* Sock is dead, so set chan data to NULL, avoid other task use invalid
+ * sock pointer.
+ */
+ l2cap_pi(sk)->chan->data = NULL;
/* Kill poor orphan */
l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1481,12 +1485,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct sock *sk = chan->data;
- struct l2cap_pinfo *pi = l2cap_pi(sk);
+ struct sock *sk;
+ struct l2cap_pinfo *pi;
int err;
- lock_sock(sk);
+ sk = chan->data;
+ if (!sk)
+ return -ENXIO;
+ pi = l2cap_pi(sk);
+ lock_sock(sk);
if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
err = -ENOMEM;
goto done;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index a6fb89fa6278..7e8a20f2fc42 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -30,10 +30,6 @@ MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
/* CAN_HDR: #bytes before can_frame data part */
#define J1939_CAN_HDR (offsetof(struct can_frame, data))
-/* CAN_FTR: #bytes beyond data part */
-#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
- sizeof(((struct can_frame *)0)->data))
-
/* lowest layer */
static void j1939_can_recv(struct sk_buff *iskb, void *data)
{
@@ -342,7 +338,7 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
memset(cf, 0, J1939_CAN_HDR);
/* make it a full can frame again */
- skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+ skb_put_zero(skb, 8 - dlc);
canid = CAN_EFF_FLAG |
(skcb->priority << 26) |
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fe3df23a2595..4be73de5033c 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1593,8 +1593,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
struct j1939_session *session;
const u8 *dat;
+ int len, ret;
pgn_t pgn;
- int len;
netdev_dbg(priv->ndev, "%s\n", __func__);
@@ -1653,7 +1653,22 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
session->tskey = priv->rx_tskey++;
j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
- WARN_ON_ONCE(j1939_session_activate(session));
+ ret = j1939_session_activate(session);
+ if (ret) {
+ /* Entering this scope indicates an issue with the J1939 bus.
+ * Possible scenarios include:
+ * - A time lapse occurred, and a new session was initiated
+ * due to another packet being sent correctly. This could
+ * have been caused by too long interrupt, debugger, or being
+ * out-scheduled by another task.
+ * - The bus is receiving numerous erroneous packets, either
+ * from a malfunctioning device or during a test scenario.
+ */
+ netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n",
+ __func__, session, skcb.addr.sa, skcb.addr.da);
+ j1939_session_put(session);
+ return NULL;
+ }
return session;
}
@@ -1681,6 +1696,8 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
j1939_session_timers_cancel(session);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+ if (session->transmission)
+ j1939_session_deactivate_activate_next(session);
return -EBUSY;
}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 1daf95e17d67..3a5bd1cd1e99 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -429,7 +429,10 @@ static int is_out(const struct crush_map *map,
/**
* crush_choose_firstn - choose numrep distinct items of given type
* @map: the crush_map
+ * @work: working space initialized by crush_init_workspace()
* @bucket: the bucket we are choose an item from
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
* @x: crush input value
* @numrep: the number of items to choose
* @type: the type of item to choose
@@ -445,6 +448,7 @@ static int is_out(const struct crush_map *map,
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
+ * @choose_args: weights and ids for each known bucket
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_work *work,
@@ -636,9 +640,8 @@ reject:
}
-/**
+/*
* crush_choose_indep: alternative breadth-first positionally stable mapping
- *
*/
static void crush_choose_indep(const struct crush_map *map,
struct crush_work *work,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index f263f7e91a21..ab66b599ac47 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
struct ceph_mon_client *monc =
container_of(work, struct ceph_mon_client, delayed_work.work);
- dout("monc delayed_work\n");
mutex_lock(&monc->mutex);
+ dout("%s mon%d\n", __func__, monc->cur_mon);
+ if (monc->cur_mon < 0) {
+ goto out;
+ }
+
if (monc->hunting) {
dout("%s continuing hunt\n", __func__);
reopen_session(monc);
} else {
int is_auth = ceph_auth_is_authenticated(monc->auth);
+
+ dout("%s is_authed %d\n", __func__, is_auth);
if (ceph_con_keepalive_expired(&monc->con,
CEPH_MONC_PING_TIMEOUT)) {
dout("monc keepalive timeout\n");
@@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
}
}
__schedule_delayed(monc);
+
+out:
mutex_unlock(&monc->mutex);
}
@@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
void ceph_monc_stop(struct ceph_mon_client *monc)
{
dout("stop\n");
- cancel_delayed_work_sync(&monc->delayed_work);
mutex_lock(&monc->mutex);
__close_session(monc);
+ monc->hunting = false;
monc->cur_mon = -1;
mutex_unlock(&monc->mutex);
+ cancel_delayed_work_sync(&monc->delayed_work);
+
/*
* flush msgr queue before we destroy ourselves to ensure that:
* - any work that references our embedded con is finished.
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e614cfd8e14a..e72dd78471a6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -416,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
if (copy > len)
copy = len;
- n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
- vaddr + skb_frag_off(frag) + offset - start,
- copy, data, to);
- kunmap(page);
+
+ n = 0;
+ skb_frag_foreach_page(frag,
+ skb_frag_off(frag) + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_local_page(p);
+ n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
+ vaddr + p_off, p_len, data, to);
+ kunmap_local(vaddr);
+ }
+
offset += n;
if (n != copy)
goto short_copy;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4d4de9008f6f..2b4819b610b8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1226,9 +1226,9 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- write_seqlock(&netdev_rename_lock);
+ write_seqlock_bh(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
- write_sequnlock(&netdev_rename_lock);
+ write_sequnlock_bh(&netdev_rename_lock);
if (err < 0) {
up_write(&devnet_rename_sem);
@@ -1269,9 +1269,9 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
- write_seqlock(&netdev_rename_lock);
+ write_seqlock_bh(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
- write_sequnlock(&netdev_rename_lock);
+ write_sequnlock_bh(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
@@ -11419,9 +11419,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
if (new_name[0]) {
/* Rename the netdev to prepared name */
- write_seqlock(&netdev_rename_lock);
+ write_seqlock_bh(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
- write_sequnlock(&netdev_rename_lock);
+ write_sequnlock_bh(&netdev_rename_lock);
}
/* Fixup kobjects */
diff --git a/net/core/filter.c b/net/core/filter.c
index 2510464692af..9933851c685e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1665,6 +1665,11 @@ static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
static inline int __bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
+#ifdef CONFIG_DEBUG_NET
+ /* Avoid a splat in pskb_may_pull_reason() */
+ if (write_len > INT_MAX)
+ return -EINVAL;
+#endif
return skb_ensure_writable(skb, write_len);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 4f7a61688d18..6a823ba906c6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -693,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net);
* get_net_ns - increment the refcount of the network namespace
* @ns: common namespace (net)
*
- * Returns the net's common namespace.
+ * Returns the net's common namespace or ERR_PTR() if ref is zero.
*/
struct ns_common *get_net_ns(struct ns_common *ns)
{
- return &get_net(container_of(ns, struct net, ns))->ns;
+ struct net *net;
+
+ net = maybe_get_net(container_of(ns, struct net, ns));
+ if (net)
+ return &net->ns;
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(get_net_ns);
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 1f6ae6379e0f..05f9515d2c05 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -59,22 +59,22 @@ XDP_METADATA_KFUNC_xxx
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
xdp_rx_meta, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
- xsk_features, NETDEV_A_DEV_PAD)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ xsk_features, NETDEV_A_DEV_PAD))
+ goto err_cancel_msg;
if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
- netdev->xdp_zc_max_segs)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ netdev->xdp_zc_max_segs))
+ goto err_cancel_msg;
}
genlmsg_end(rsp, hdr);
return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
}
static void
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fd20aae30be2..bbf40b999713 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (copy)
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
diff --git a/net/core/sock.c b/net/core/sock.c
index 8629f9aecf91..100e975073ca 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3742,6 +3742,9 @@ void sk_common_release(struct sock *sk)
sk->sk_prot->unhash(sk);
+ if (sk->sk_socket)
+ sk->sk_socket->sk = NULL;
+
/*
* In this point socket cannot receive new packets, but it is possible
* that some packets are in flight because some CPU runs receiver and
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41693154e426..022c12059cf2 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
mutex_lock(&mem_id_lock);
ret = __mem_id_init_hash_table();
mutex_unlock(&mem_id_lock);
- if (ret < 0) {
- WARN_ON(1);
+ if (ret < 0)
return ERR_PTR(ret);
- }
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ff41bd6f99c3..5926159a6f20 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -657,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v4_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 85f4b8fdbe5e..da5dba120bc9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -400,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index e645d751a5e8..223dcd25d88a 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1306,7 +1306,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
return -EINVAL;
- if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE &&
+ (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
!ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index b2de2108b356..34d76e87847d 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi(phydev);
mutex_unlock(&phydev->lock);
@@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi_max)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi_max(phydev);
mutex_unlock(&phydev->lock);
@@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
return ret;
};
+static bool linkstate_sqi_critical_error(int sqi)
+{
+ return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
+}
+
+static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
+{
+ return data->sqi >= 0 && data->sqi_max >= 0 &&
+ data->sqi <= data->sqi_max;
+}
+
static int linkstate_get_link_ext_state(struct net_device *dev,
struct linkstate_reply_data *data)
{
@@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
data->link = __ethtool_get_link(dev);
ret = linkstate_get_sqi(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
- if (data->sqi != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
-
- if (data->sqi_max != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
+ if (linkstate_sqi_valid(data)) {
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
+ }
if (data->link_ext_state_provided)
len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
@@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
- if (data->sqi != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
- return -EMSGSIZE;
+ if (linkstate_sqi_valid(data)) {
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
- if (data->sqi_max != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
- return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
+ data->sqi_max))
+ return -EMSGSIZE;
+ }
if (data->link_ext_state_provided) {
if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index dd6d46015058..e9cb27061c12 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1810,6 +1810,29 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
return CIPSO_V4_HDR_LEN + ret_val;
}
+static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len)
+{
+ int iter = 0, optlen = 0;
+
+ /* determining the new total option length is tricky because of
+ * the padding necessary, the only thing i can think to do at
+ * this point is walk the options one-by-one, skipping the
+ * padding at the end to determine the actual option size and
+ * from there we can determine the new total option length
+ */
+ while (iter < len) {
+ if (data[iter] == IPOPT_END) {
+ break;
+ } else if (data[iter] == IPOPT_NOP) {
+ iter++;
+ } else {
+ iter += data[iter + 1];
+ optlen = iter;
+ }
+ }
+ return optlen;
+}
+
/**
* cipso_v4_sock_setattr - Add a CIPSO option to a socket
* @sk: the socket
@@ -1986,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
u8 cipso_len;
u8 cipso_off;
unsigned char *cipso_ptr;
- int iter;
int optlen_new;
cipso_off = opt->opt.cipso - sizeof(struct iphdr);
@@ -2006,19 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
memmove(cipso_ptr, cipso_ptr + cipso_len,
opt->opt.optlen - cipso_off - cipso_len);
- /* determining the new total option length is tricky because of
- * the padding necessary, the only thing i can think to do at
- * this point is walk the options one-by-one, skipping the
- * padding at the end to determine the actual option size and
- * from there we can determine the new total option length */
- iter = 0;
- optlen_new = 0;
- while (iter < opt->opt.optlen)
- if (opt->opt.__data[iter] != IPOPT_NOP) {
- iter += opt->opt.__data[iter + 1];
- optlen_new = iter;
- } else
- iter++;
+ optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data,
+ opt->opt.optlen);
hdr_delta = opt->opt.optlen;
opt->opt.optlen = (optlen_new + 3) & ~3;
hdr_delta -= opt->opt.optlen;
@@ -2238,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
*/
int cipso_v4_skbuff_delattr(struct sk_buff *skb)
{
- int ret_val;
+ int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len,
+ hdr_len_delta;
struct iphdr *iph;
struct ip_options *opt = &IPCB(skb)->opt;
unsigned char *cipso_ptr;
@@ -2251,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb)
if (ret_val < 0)
return ret_val;
- /* the easiest thing to do is just replace the cipso option with noop
- * options since we don't change the size of the packet, although we
- * still need to recalculate the checksum */
-
iph = ip_hdr(skb);
cipso_ptr = (unsigned char *)iph + opt->cipso;
- memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+ cipso_len = cipso_ptr[1];
+
+ hdr_len_actual = sizeof(struct iphdr) +
+ cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1),
+ opt->optlen);
+ new_hdr_len_actual = hdr_len_actual - cipso_len;
+ new_hdr_len = (new_hdr_len_actual + 3) & ~3;
+ hdr_len_delta = (iph->ihl << 2) - new_hdr_len;
+
+ /* 1. shift any options after CIPSO to the left */
+ memmove(cipso_ptr, cipso_ptr + cipso_len,
+ new_hdr_len_actual - opt->cipso);
+ /* 2. move the whole IP header to its new place */
+ memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual);
+ /* 3. adjust the skb layout */
+ skb_pull(skb, hdr_len_delta);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ /* 4. re-fill new padding with IPOPT_END (may now be longer) */
+ memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END,
+ new_hdr_len - new_hdr_len_actual);
+
+ opt->optlen -= hdr_len_delta;
opt->cipso = 0;
opt->is_changed = 1;
-
+ if (hdr_len_delta != 0) {
+ iph->ihl = new_hdr_len >> 2;
+ iph_set_totlen(iph, skb->len);
+ }
ip_send_check(iph);
return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d81f74ce0f02..d4f0eff8b20f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1122,25 +1122,34 @@ drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
+ bool found_dup_sk = false;
+
+ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
+ return false;
+
+ /* The timer needs to be setup after a successful insertion. */
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+ return true;
}
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
- reqsk_queue_hash_req(req, timeout);
+ if (!reqsk_queue_hash_req(req, timeout))
+ return false;
+
inet_csk_reqsk_queue_added(sk);
+ return true;
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7adace541fe2..9712cdb8087c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1383,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
req.sdiag_family = AF_UNSPEC; /* compatibility */
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
@@ -1398,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.sdiag_family = rc->idiag_family;
req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 37c42b63ff99..09c0fa6756b7 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -1968,8 +1968,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
first = true;
}
- if (cmd.ao_required && tcp_ao_required_verify(sk))
- return -EKEYREJECTED;
+ if (cmd.ao_required && tcp_ao_required_verify(sk)) {
+ err = -EKEYREJECTED;
+ goto out;
+ }
/* For sockets in TCP_CLOSED it's possible set keys that aren't
* matching the future peer (address/port/VRF/etc),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c04a9c8be9d..38da23f991d6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
static inline void tcp_init_undo(struct tcp_sock *tp)
{
tp->undo_marker = tp->snd_una;
+
/* Retransmission still in flight may cause DSACKs later. */
- tp->undo_retrans = tp->retrans_out ? : -1;
+ /* First, account for regular retransmits in flight: */
+ tp->undo_retrans = tp->retrans_out;
+ /* Next, account for TLP retransmits in flight: */
+ if (tp->tlp_high_seq && tp->tlp_retrans)
+ tp->undo_retrans++;
+ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
+ if (!tp->undo_retrans)
+ tp->undo_retrans = -1;
}
static bool tcp_is_rack(const struct sock *sk)
@@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
@@ -2782,13 +2791,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
+/* Sometimes we deduce that packets have been dropped due to reasons other than
+ * congestion, like path MTU reductions or failed client TFO attempts. In these
+ * cases we call this function to retransmit as many packets as cwnd allows,
+ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
+ * non-zero value (and may do so in a later calling context due to TSQ), we
+ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
+ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
+ * are using the correct retrans_stamp and don't declare ETIMEDOUT
+ * prematurely).
+ */
+static void tcp_non_congestion_loss_retransmit(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (icsk->icsk_ca_state != TCP_CA_Loss) {
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_ssthresh = 0;
+ tp->undo_marker = 0;
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Do a simple retransmit without using the backoff mechanisms in
* tcp_timer. This is used for path mtu discovery.
* The socket is already locked here.
*/
void tcp_simple_retransmit(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int mss;
@@ -2828,14 +2861,7 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
- if (icsk->icsk_ca_state != TCP_CA_Loss) {
- tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_ssthresh = 0;
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Loss);
- }
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
}
EXPORT_SYMBOL(tcp_simple_retransmit);
@@ -3060,7 +3086,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
return;
if (tcp_try_undo_dsack(sk))
- tcp_try_keep_open(sk);
+ tcp_try_to_open(sk, flag);
tcp_identify_packet_loss(sk, ack_flag);
if (icsk->icsk_ca_state != TCP_CA_Recovery) {
@@ -4207,6 +4233,13 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
+#ifdef CONFIG_TCP_AO
+ case TCPOPT_AO:
+ /* TCP AO has already been checked
+ * (see tcp_inbound_ao_hash()).
+ */
+ break;
+#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -6295,7 +6328,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data)
tcp_mark_skb_lost(sk, data);
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
@@ -7256,7 +7289,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie) {
req->timeout = tcp_timeout_init((struct sock *)req);
- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
+ req->timeout))) {
+ reqsk_free(req);
+ return 0;
+ }
+
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index e93df98de3f4..b01eb6d94413 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] =
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
[TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
.len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5bfd76a31af6..892c86657fbc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -483,15 +483,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct sk_buff *skb,
u32 rtx_delta)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- const int timeout = TCP_RTO_MAX * 2;
+ int timeout = TCP_RTO_MAX * 2;
s32 rcv_delta;
+ if (user_timeout) {
+ /* If user application specified a TCP_USER_TIMEOUT,
+ * it does not want win 0 packets to 'reset the timer'
+ * while retransmits are not making progress.
+ */
+ if (rtx_delta > user_timeout)
+ return true;
+ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+ }
/* Note: timer interrupt might have been delayed by at least one jiffy,
* and tp->rcv_tstamp might very well have been written recently.
* rcv_delta can thus be negative.
*/
- rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -536,8 +547,6 @@ void tcp_retransmit_timer(struct sock *sk)
if (WARN_ON_ONCE(!skb))
return;
- tp->tlp_high_seq = 0;
-
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 189c9113fe9a..578668878a85 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -326,6 +326,8 @@ found:
goto fail_unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -342,7 +344,7 @@ found:
hslot2->count++;
spin_unlock(&hslot2->lock);
}
- sock_set_flag(sk, SOCK_RCU_FREE);
+
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6e57c03e3255..83e4f9855ae1 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2514,7 +2514,8 @@ int __init fib6_init(void)
goto out_kmem_cache_create;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED);
+ inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
+ RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 952c2bf11709..8d72ca0b086d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -638,6 +638,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
+ if (!idev)
+ goto out;
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
if (READ_ONCE(neigh->nud_state) & NUD_VALID)
@@ -3603,7 +3605,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
if (!dev)
goto out;
- if (idev->cnf.disable_ipv6) {
+ if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 24e2b4b494cb..c434940131b1 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx6_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx6_finish);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx4_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cc885d3aa9e5..2f1ea5f999a2 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif,
{
struct dst_entry *dst;
struct net_device *dev;
+ struct inet6_dev *idev;
dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
- dev = ip6_dst_idev(dst)->dev;
+ idev = ip6_dst_idev(dst);
+ if (!idev) {
+ dst_release(dst);
+ return -EHOSTUNREACH;
+ }
+ dev = idev->dev;
ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
dst_release(dst);
return 0;
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index dce37ba8ebe3..254d745832cb 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -311,6 +311,18 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ /*
+ * We should perhaps push emulate chanctx down and only
+ * make it call ->config() when the chanctx is actually
+ * assigned here (and unassigned below), but that's yet
+ * another change to all drivers to add assign/unassign
+ * emulation callbacks. Maybe later.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return 0;
+
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -338,6 +350,11 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index dc42902e2693..b935bb5d8ed1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -686,6 +686,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_del_virtual_monitor(local);
ieee80211_recalc_idle(local);
+ ieee80211_recalc_offload(local);
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
break;
@@ -1121,9 +1122,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
int ret;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return 0;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1145,11 +1143,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
ieee80211_set_default_queues(sdata);
- ret = drv_add_interface(local, sdata);
- if (WARN_ON(ret)) {
- /* ok .. stupid driver, it asked for this! */
- kfree(sdata);
- return ret;
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
+ ret = drv_add_interface(local, sdata);
+ if (WARN_ON(ret)) {
+ /* ok .. stupid driver, it asked for this! */
+ kfree(sdata);
+ return ret;
+ }
}
set_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -1187,9 +1187,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1209,7 +1206,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
ieee80211_link_release_channel(&sdata->deflink);
- drv_remove_interface(local, sdata);
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ drv_remove_interface(local, sdata);
kfree(sdata);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1132dea0e290..0965ad11ec74 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -423,6 +423,7 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_ERP_SLOT;
}
+/* context: requires softirqs disabled */
void ieee80211_handle_queued_frames(struct ieee80211_local *local)
{
struct sk_buff *skb;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 8ecc4b710b0e..b5f2df61c7f6 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -358,7 +358,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
struct cfg80211_scan_request *req;
struct cfg80211_chan_def chandef;
u8 bands_used = 0;
- int i, ielen, n_chans;
+ int i, ielen;
+ u32 *n_chans;
u32 flags = 0;
req = rcu_dereference_protected(local->scan_req,
@@ -368,34 +369,34 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
return false;
if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
for (i = 0; i < req->n_channels; i++) {
local->hw_scan_req->req.channels[i] = req->channels[i];
bands_used |= BIT(req->channels[i]->band);
}
-
- n_chans = req->n_channels;
} else {
do {
if (local->hw_scan_band == NUM_NL80211_BANDS)
return false;
- n_chans = 0;
+ n_chans = &local->hw_scan_req->req.n_channels;
+ *n_chans = 0;
for (i = 0; i < req->n_channels; i++) {
if (req->channels[i]->band !=
local->hw_scan_band)
continue;
- local->hw_scan_req->req.channels[n_chans] =
+ local->hw_scan_req->req.channels[(*n_chans)++] =
req->channels[i];
- n_chans++;
+
bands_used |= BIT(req->channels[i]->band);
}
local->hw_scan_band++;
- } while (!n_chans);
+ } while (!*n_chans);
}
- local->hw_scan_req->req.n_channels = n_chans;
ieee80211_prepare_scan_chandef(&chandef);
if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 283bfc99417e..771c05640aa3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1567,7 +1567,9 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
void ieee80211_stop_device(struct ieee80211_local *local)
{
+ local_bh_disable();
ieee80211_handle_queued_frames(local);
+ local_bh_enable();
ieee80211_led_radio(local, false);
ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
@@ -1843,7 +1845,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add interfaces */
sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
/* in HW restart it exists already */
WARN_ON(local->resuming);
res = drv_add_interface(local, sdata);
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 9ab7396668d2..21b7c3b280b4 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy,
}
phy->symbol_duration = duration;
- phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
- phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
+ phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
}
EXPORT_SYMBOL(ieee802154_configure_durations);
@@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period =
- (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
- wpan_phy->sifs_period =
- (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
+ wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 2a6f1ed763c9..6fbed5bb5c3e 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work)
if (res)
goto err_tx;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, skb->len);
ieee802154_xmit_complete(&local->hw, skb, false);
@@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
if (ret)
goto err_wake_netif_queue;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, len);
} else {
local->tx_skb = skb;
queue_work(local->workqueue, &local->sync_tx_work);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3126911f5042..b00fc285b334 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -815,12 +815,21 @@ int __init netfilter_init(void)
if (ret < 0)
goto err;
+#ifdef CONFIG_LWTUNNEL
+ ret = netfilter_lwtunnel_init();
+ if (ret < 0)
+ goto err_lwtunnel_pernet;
+#endif
ret = netfilter_log_init();
if (ret < 0)
- goto err_pernet;
+ goto err_log_pernet;
return 0;
-err_pernet:
+err_log_pernet:
+#ifdef CONFIG_LWTUNNEL
+ netfilter_lwtunnel_fini();
+err_lwtunnel_pernet:
+#endif
unregister_pernet_subsys(&netfilter_net_ops);
err:
return ret;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c7ae4d9bf3d2..61431690cbd5 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex or ip_set_ref_lock is held: */
-#define ip_set_dereference(p) \
- rcu_dereference_protected(p, \
+#define ip_set_dereference(inst) \
+ rcu_dereference_protected((inst)->ip_set_list, \
lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
- lockdep_is_held(&ip_set_ref_lock))
+ lockdep_is_held(&ip_set_ref_lock) || \
+ (inst)->is_deleted)
#define ip_set(inst, id) \
- ip_set_dereference((inst)->ip_set_list)[id]
+ ip_set_dereference(inst)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
#define ip_set_dereference_nfnl(p) \
@@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = ip_set_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 74112e9c5dab..6c40bdf8b05a 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,9 +22,6 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
-#ifdef CONFIG_LWTUNNEL
-#include <net/netfilter/nf_hooks_lwtunnel.h>
-#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -612,9 +609,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
-#ifdef CONFIG_LWTUNNEL
- NF_SYSCTL_CT_LWTUNNEL,
-#endif
NF_SYSCTL_CT_LAST_SYSCTL,
};
@@ -946,15 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
-#ifdef CONFIG_LWTUNNEL
- [NF_SYSCTL_CT_LWTUNNEL] = {
- .procname = "nf_hooks_lwtunnel",
- .data = NULL,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
- },
-#endif
};
static struct ctl_table nf_ct_netfilter_table[] = {
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
index 00e89ffd78f6..d8ebebc9775d 100644
--- a/net/netfilter/nf_hooks_lwtunnel.c
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -3,6 +3,9 @@
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
+#include <linux/netfilter.h>
+
+#include "nf_internals.h"
static inline int nf_hooks_lwtunnel_get(void)
{
@@ -50,4 +53,71 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+
+static struct ctl_table nf_lwtunnel_sysctl_table[] = {
+ {
+ .procname = "nf_hooks_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
+ },
+};
+
+static int __net_init nf_lwtunnel_net_init(struct net *net)
+{
+ struct ctl_table_header *hdr;
+ struct ctl_table *table;
+
+ table = nf_lwtunnel_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(nf_lwtunnel_sysctl_table,
+ sizeof(nf_lwtunnel_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+ }
+
+ hdr = register_net_sysctl_sz(net, "net/netfilter", table,
+ ARRAY_SIZE(nf_lwtunnel_sysctl_table));
+ if (!hdr)
+ goto err_reg;
+
+ net->nf.nf_lwtnl_dir_header = hdr;
+
+ return 0;
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit nf_lwtunnel_net_exit(struct net *net)
+{
+ const struct ctl_table *table;
+
+ table = net->nf.nf_lwtnl_dir_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct pernet_operations nf_lwtunnel_net_ops = {
+ .init = nf_lwtunnel_net_init,
+ .exit = nf_lwtunnel_net_exit,
+};
+
+int __init netfilter_lwtunnel_init(void)
+{
+ return register_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+
+void netfilter_lwtunnel_fini(void)
+{
+ unregister_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+#else
+int __init netfilter_lwtunnel_init(void) { return 0; }
+void netfilter_lwtunnel_fini(void) {}
#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 832ae64179f0..25403023060b 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
int __init netfilter_log_init(void);
+#ifdef CONFIG_LWTUNNEL
+/* nf_hooks_lwtunnel.c */
+int __init netfilter_lwtunnel_init(void);
+void netfilter_lwtunnel_fini(void);
+#endif
+
/* core.c */
void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
const struct nf_hook_ops *reg);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index be3b4c90d2ed..91cc3a81ba8f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3823,6 +3823,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3844,6 +3853,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -5740,8 +5752,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
- set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
- set->dlen) < 0)
+ nft_set_datatype(set), set->dlen) < 0)
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
@@ -10810,150 +10821,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_elem_priv *elem_priv)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
-
- if (!nft_set_elem_active(ext, iter->genmask))
- return 0;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- if (fatal_signal_pending(current))
- return -EINTR;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -11066,13 +10933,16 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
return 0;
default:
+ if (type != NFT_DATA_VALUE)
+ return -EINVAL;
+
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
if (len == 0)
@@ -11081,8 +10951,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- if (data != NULL && type != NFT_DATA_VALUE)
- return -EINVAL;
return 0;
}
}
@@ -11483,8 +11351,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nf_tables_destroy_list))
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f1c31757e496..55e28e1da66e 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
hooks = nf_hook_entries_head(net, pf, entry->state.hook);
i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ if (!hooks || i >= hooks->num_hook_entries) {
kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
nf_queue_entry_free(entry);
return;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b314ca728a29..f3080fa1b226 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
- &priv->dreg, NULL, set->dtype,
+ &priv->dreg, NULL,
+ nft_set_datatype(set),
set->dlen);
if (err < 0)
return err;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 4e7c968cde2d..5e3ca068f04e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
+ if (sk->sk_state == TCP_LISTEN)
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 2928c142a2dd..3b980bf2770b 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9ee622fb1160..2520708b06a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
u32 max;
if (*index) {
-again:
rcu_read_lock();
p = idr_find(&idrinfo->action_idr, *index);
@@ -839,7 +838,7 @@ again:
* index but did not assign the pointer yet.
*/
rcu_read_unlock();
- goto again;
+ return -EAGAIN;
}
if (!p) {
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index baac083fd8f1..6fa3cca87d34 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -41,21 +41,26 @@ static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
static DEFINE_MUTEX(zones_mutex);
+struct zones_ht_key {
+ struct net *net;
+ u16 zone;
+};
+
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
refcount_t ref;
- u16 zone;
+ struct zones_ht_key key;
bool dying;
};
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
- .key_offset = offsetof(struct tcf_ct_flow_table, zone),
- .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .key_offset = offsetof(struct tcf_ct_flow_table, key),
+ .key_len = sizeof_field(struct tcf_ct_flow_table, key),
.automatic_shrinking = true,
};
@@ -316,11 +321,12 @@ static struct nf_flowtable_type flowtable_ct = {
static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
{
+ struct zones_ht_key key = { .net = net, .zone = params->zone };
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
mutex_lock(&zones_mutex);
- ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params);
if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
goto out_unlock;
@@ -329,7 +335,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
goto err_alloc;
refcount_set(&ct_ft->ref, 1);
- ct_ft->zone = params->zone;
+ ct_ft->key = key;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
if (err)
goto err_insert;
@@ -1071,6 +1077,14 @@ do_nat:
*/
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
goto drop;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index c2ef9dcf91d2..cc6051d4f2ef 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
- tcx_miniq_set_active(entry, false);
+ tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
@@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
@@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
- tcx_miniq_set_active(ingress_entry, false);
+ tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
@@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
- tcx_miniq_set_active(egress_entry, false);
+ tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
diff --git a/net/socket.c b/net/socket.c
index e416920e9399..fcbdd5bc47ac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1822,6 +1822,20 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
return __sys_socketpair(family, type, protocol, usockvec);
}
+int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+ int addrlen)
+{
+ int err;
+
+ err = security_socket_bind(sock, (struct sockaddr *)address,
+ addrlen);
+ if (!err)
+ err = READ_ONCE(sock->ops)->bind(sock,
+ (struct sockaddr *)address,
+ addrlen);
+ return err;
+}
+
/*
* Bind a name to a socket. Nothing much to do here since it's
* the protocol's responsibility to handle the local address.
@@ -1839,15 +1853,8 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, &address);
- if (!err) {
- err = security_socket_bind(sock,
- (struct sockaddr *)&address,
- addrlen);
- if (!err)
- err = READ_ONCE(sock->ops)->bind(sock,
- (struct sockaddr *)
- &address, addrlen);
- }
+ if (!err)
+ err = __sys_bind_socket(sock, &address, addrlen);
fput_light(sock->file, fput_needed);
}
return err;
@@ -1863,23 +1870,28 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
+int __sys_listen_socket(struct socket *sock, int backlog)
+{
+ int somaxconn, err;
+
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
+ if ((unsigned int)backlog > somaxconn)
+ backlog = somaxconn;
+
+ err = security_socket_listen(sock, backlog);
+ if (!err)
+ err = READ_ONCE(sock->ops)->listen(sock, backlog);
+ return err;
+}
int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
- int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
- if ((unsigned int)backlog > somaxconn)
- backlog = somaxconn;
-
- err = security_socket_listen(sock, backlog);
- if (!err)
- err = READ_ONCE(sock->ops)->listen(sock, backlog);
-
+ err = __sys_listen_socket(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b4b1276d4e8..d9cda1e53a01 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1557,9 +1557,11 @@ out_drop:
*/
void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
+ struct rpc_timeout timeout = {
+ .to_increment = 0,
+ };
struct rpc_task *task;
int proc_error;
- struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
@@ -1612,6 +1614,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
timeout.to_initval = req->rq_xprt->timeout->to_initval;
timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ timeout.to_maxval = timeout.to_initval;
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dfc353eea8ed..0e1691316f42 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c1e890a82434..500320e5ca47 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2105,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
} else {
n = tipc_node_find_by_id(net, ehdr->id);
}
+ skb_dst_force(skb);
tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
if (!skb)
return;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5e695a9a609c..142f56770b77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2613,10 +2613,24 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
{
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
- skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
- skb = NULL;
+ if (!unix_skb_len(skb)) {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ if (copied && (!u->oob_skb || skb == u->oob_skb)) {
+ skb = NULL;
+ } else if (flags & MSG_PEEK) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ } else {
+ unlinked_skb = skb;
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ }
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(unlinked_skb);
} else {
struct sk_buff *unlinked_skb = NULL;
@@ -3093,12 +3107,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
case SIOCATMARK:
{
+ struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int answ = 0;
+ mutex_lock(&u->iolock);
+
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
- answ = 1;
+ if (skb) {
+ struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+
+ if (skb == oob_skb ||
+ (!oob_skb && !unix_skb_len(skb)))
+ answ = 1;
+ }
+
+ mutex_unlock(&u->iolock);
+
err = put_user(answ, (int __user *)arg);
}
break;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index dfe94a90ece4..23efb78fe9ef 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -476,6 +476,7 @@ prev_vertex:
}
if (vertex->index == vertex->scc_index) {
+ struct unix_vertex *v;
struct list_head scc;
bool scc_dead = true;
@@ -486,15 +487,15 @@ prev_vertex:
*/
__list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
- list_for_each_entry_reverse(vertex, &scc, scc_entry) {
+ list_for_each_entry_reverse(v, &scc, scc_entry) {
/* Don't restart DFS from this vertex in unix_walk_scc(). */
- list_move_tail(&vertex->entry, &unix_visited_vertices);
+ list_move_tail(&v->entry, &unix_visited_vertices);
/* Mark vertex as off-stack. */
- vertex->index = unix_vertex_grouped_index;
+ v->index = unix_vertex_grouped_index;
if (scc_dead)
- scc_dead = unix_vertex_dead(vertex);
+ scc_dead = unix_vertex_dead(v);
}
if (scc_dead)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3c0bca4238d3..72c7bf558581 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -468,6 +468,10 @@ static const struct netlink_range_validation nl80211_punct_bitmap_range = {
.max = 0xffff,
};
+static const struct netlink_range_validation q_range = {
+ .max = INT_MAX,
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -754,7 +758,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
- [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+ [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range),
[NL80211_ATTR_HE_CAPABILITY] =
NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa,
NL80211_HE_MAX_CAPABILITY_LEN),
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2f2a3163968a..0222ede0feb6 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -3416,10 +3416,14 @@ int cfg80211_wext_siwscan(struct net_device *dev,
wiphy = &rdev->wiphy;
/* Determine number of channels, needed to allocate creq */
- if (wreq && wreq->num_channels)
+ if (wreq && wreq->num_channels) {
+ /* Passed from userspace so should be checked */
+ if (unlikely(wreq->num_channels > IW_MAX_FREQUENCIES))
+ return -EINVAL;
n_channels = wreq->num_channels;
- else
+ } else {
n_channels = ieee80211_get_num_supported_channels(wiphy);
+ }
creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
n_channels * sizeof(void *),
@@ -3493,8 +3497,10 @@ int cfg80211_wext_siwscan(struct net_device *dev,
memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
creq->ssids[0].ssid_len = wreq->essid_len;
}
- if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
+ if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) {
+ creq->ssids = NULL;
creq->n_ssids = 0;
+ }
}
for (i = 0; i < NUM_NL80211_BANDS; i++)
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index ddb5644d4fd9..6deee85a29c8 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -7,6 +7,9 @@
*/
#include <kunit/test.h>
+#include <linux/blk_types.h>
+#include <linux/blk-mq.h>
+#include <linux/blkdev.h>
#include <linux/errname.h>
#include <linux/ethtool.h>
#include <linux/jiffies.h>
@@ -20,8 +23,10 @@
/* `bindgen` gets confused at certain things. */
const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
+const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE;
const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC;
const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL;
const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;
const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT;
const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO;
+const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL;
diff --git a/rust/helpers.c b/rust/helpers.c
index 2c37a0f5d7a8..3df5217fb2ff 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -186,3 +186,19 @@ static_assert(
__alignof__(size_t) == __alignof__(uintptr_t),
"Rust code expects C `size_t` to match Rust `usize`"
);
+
+// This will soon be moved to a separate file, so no need to merge with above.
+#include <linux/blk-mq.h>
+#include <linux/blkdev.h>
+
+void *rust_helper_blk_mq_rq_to_pdu(struct request *rq)
+{
+ return blk_mq_rq_to_pdu(rq);
+}
+EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_to_pdu);
+
+struct request *rust_helper_blk_mq_rq_from_pdu(void *pdu)
+{
+ return blk_mq_rq_from_pdu(pdu);
+}
+EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_from_pdu);
diff --git a/rust/kernel/alloc/vec_ext.rs b/rust/kernel/alloc/vec_ext.rs
index e9a81052728a..1297a4be32e8 100644
--- a/rust/kernel/alloc/vec_ext.rs
+++ b/rust/kernel/alloc/vec_ext.rs
@@ -4,7 +4,6 @@
use super::{AllocError, Flags};
use alloc::vec::Vec;
-use core::ptr;
/// Extensions to [`Vec`].
pub trait VecExt<T>: Sized {
@@ -141,7 +140,11 @@ impl<T> VecExt<T> for Vec<T> {
// `krealloc_aligned`. A `Vec<T>`'s `ptr` value is not guaranteed to be NULL and might be
// dangling after being created with `Vec::new`. Instead, we can rely on `Vec<T>`'s capacity
// to be zero if no memory has been allocated yet.
- let ptr = if cap == 0 { ptr::null_mut() } else { old_ptr };
+ let ptr = if cap == 0 {
+ core::ptr::null_mut()
+ } else {
+ old_ptr
+ };
// SAFETY: `ptr` is valid because it's either NULL or comes from a previous call to
// `krealloc_aligned`. We also verified that the type is not a ZST.
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
new file mode 100644
index 000000000000..150f710efe5b
--- /dev/null
+++ b/rust/kernel/block.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Types for working with the block layer.
+
+pub mod mq;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
new file mode 100644
index 000000000000..fb0f393c1cea
--- /dev/null
+++ b/rust/kernel/block/mq.rs
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides types for implementing block drivers that interface the
+//! blk-mq subsystem.
+//!
+//! To implement a block device driver, a Rust module must do the following:
+//!
+//! - Implement [`Operations`] for a type `T`.
+//! - Create a [`TagSet<T>`].
+//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`].
+//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in
+//! the `TagSet` reference.
+//!
+//! The types available in this module that have direct C counterparts are:
+//!
+//! - The [`TagSet`] type that abstracts the C type `struct tag_set`.
+//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`.
+//! - The [`Request`] type that abstracts the C type `struct request`.
+//!
+//! The kernel will interface with the block device driver by calling the method
+//! implementations of the `Operations` trait.
+//!
+//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`]
+//! instances. The `Request` type is a wrapper around the C `struct request`.
+//! The driver must mark end of processing by calling one of the
+//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout
+//! errors. Please note that the C function `blk_mq_start_request` is implicitly
+//! called when the request is queued with the driver.
+//!
+//! The `TagSet` is responsible for creating and maintaining a mapping between
+//! `Request`s and integer ids as well as carrying a pointer to the vtable
+//! generated by `Operations`. This mapping is useful for associating
+//! completions from hardware with the correct `Request` instance. The `TagSet`
+//! determines the maximum queue depth by setting the number of `Request`
+//! instances available to the driver, and it determines the number of queues to
+//! instantiate for the driver. If possible, a driver should allocate one queue
+//! per core, to keep queue data local to a core.
+//!
+//! One `TagSet` instance can be shared between multiple `GenDisk` instances.
+//! This can be useful when implementing drivers where one piece of hardware
+//! with one set of IO resources are represented to the user as multiple disks.
+//!
+//! One significant difference between block device drivers implemented with
+//! these Rust abstractions and drivers implemented in C, is that the Rust
+//! drivers have to own a reference count on the `Request` type when the IO is
+//! in flight. This is to ensure that the C `struct request` instances backing
+//! the Rust `Request` instances are live while the Rust driver holds a
+//! reference to the `Request`. In addition, the conversion of an integer tag to
+//! a `Request` via the `TagSet` would not be sound without this bookkeeping.
+//!
+//! [`GenDisk`]: gen_disk::GenDisk
+//! [`GenDisk<T>`]: gen_disk::GenDisk
+//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder
+//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build
+//!
+//! # Example
+//!
+//! ```rust
+//! use kernel::{
+//! alloc::flags,
+//! block::mq::*,
+//! new_mutex,
+//! prelude::*,
+//! sync::{Arc, Mutex},
+//! types::{ARef, ForeignOwnable},
+//! };
+//!
+//! struct MyBlkDevice;
+//!
+//! #[vtable]
+//! impl Operations for MyBlkDevice {
+//!
+//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! Request::end_ok(rq);
+//! Ok(())
+//! }
+//!
+//! fn commit_rqs() {}
+//! }
+//!
+//! let tagset: Arc<TagSet<MyBlkDevice>> =
+//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+//! let mut disk = gen_disk::GenDiskBuilder::new()
+//! .capacity_sectors(4096)
+//! .build(format_args!("myblk"), tagset)?;
+//!
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+
+pub mod gen_disk;
+mod operations;
+mod raw_writer;
+mod request;
+mod tag_set;
+
+pub use operations::Operations;
+pub use request::Request;
+pub use tag_set::TagSet;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
new file mode 100644
index 000000000000..f548a6199847
--- /dev/null
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic disk abstraction.
+//!
+//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
+//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
+
+use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
+use crate::error;
+use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
+use core::fmt::{self, Write};
+
+/// A builder for [`GenDisk`].
+///
+/// Use this struct to configure and add new [`GenDisk`] to the VFS.
+pub struct GenDiskBuilder {
+ rotational: bool,
+ logical_block_size: u32,
+ physical_block_size: u32,
+ capacity_sectors: u64,
+}
+
+impl Default for GenDiskBuilder {
+ fn default() -> Self {
+ Self {
+ rotational: false,
+ logical_block_size: bindings::PAGE_SIZE as u32,
+ physical_block_size: bindings::PAGE_SIZE as u32,
+ capacity_sectors: 0,
+ }
+ }
+}
+
+impl GenDiskBuilder {
+ /// Create a new instance.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Set the rotational media attribute for the device to be built.
+ pub fn rotational(mut self, rotational: bool) -> Self {
+ self.rotational = rotational;
+ self
+ }
+
+ /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
+ /// and that it is a power of two.
+ fn validate_block_size(size: u32) -> Result<()> {
+ if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
+ Err(error::code::EINVAL)
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Set the logical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit the storage device can address. It is
+ /// typically 4096 bytes.
+ pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.logical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the physical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit a physical storage device can write
+ /// atomically. It is usually the same as the logical block size but may be
+ /// bigger. One example is SATA drives with 4096 byte physical block size
+ /// that expose a 512 byte logical block size to the operating system.
+ pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.physical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the capacity of the device to be built, in sectors (512 bytes).
+ pub fn capacity_sectors(mut self, capacity: u64) -> Self {
+ self.capacity_sectors = capacity;
+ self
+ }
+
+ /// Build a new `GenDisk` and add it to the VFS.
+ pub fn build<T: Operations>(
+ self,
+ name: fmt::Arguments<'_>,
+ tagset: Arc<TagSet<T>>,
+ ) -> Result<GenDisk<T>> {
+ let lock_class_key = crate::sync::LockClassKey::new();
+
+ // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
+ let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
+
+ lim.logical_block_size = self.logical_block_size;
+ lim.physical_block_size = self.physical_block_size;
+ if self.rotational {
+ lim.features = bindings::BLK_FEAT_ROTATIONAL;
+ }
+
+ // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
+ let gendisk = from_err_ptr(unsafe {
+ bindings::__blk_mq_alloc_disk(
+ tagset.raw_tag_set(),
+ &mut lim,
+ core::ptr::null_mut(),
+ lock_class_key.as_ptr(),
+ )
+ })?;
+
+ const TABLE: bindings::block_device_operations = bindings::block_device_operations {
+ submit_bio: None,
+ open: None,
+ release: None,
+ ioctl: None,
+ compat_ioctl: None,
+ check_events: None,
+ unlock_native_capacity: None,
+ getgeo: None,
+ set_read_only: None,
+ swap_slot_free_notify: None,
+ report_zones: None,
+ devnode: None,
+ alternative_gpt_sector: None,
+ get_unique_id: None,
+ // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
+ // be merged (unstable in rustc 1.78 which is staged for linux 6.10)
+ // https://github.com/rust-lang/rust/issues/119618
+ owner: core::ptr::null_mut(),
+ pr_ops: core::ptr::null_mut(),
+ free_disk: None,
+ poll_bio: None,
+ };
+
+ // SAFETY: `gendisk` is a valid pointer as we initialized it above
+ unsafe { (*gendisk).fops = &TABLE };
+
+ let mut raw_writer = RawWriter::from_array(
+ // SAFETY: `gendisk` points to a valid and initialized instance. We
+ // have exclusive access, since the disk is not added to the VFS
+ // yet.
+ unsafe { &mut (*gendisk).disk_name },
+ )?;
+ raw_writer.write_fmt(name)?;
+ raw_writer.write_char('\0')?;
+
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`. `set_capacity` takes a lock to synchronize this
+ // operation, so we will not race.
+ unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
+
+ crate::error::to_result(
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`.
+ unsafe {
+ bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
+ },
+ )?;
+
+ // INVARIANT: `gendisk` was initialized above.
+ // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ Ok(GenDisk {
+ _tagset: tagset,
+ gendisk,
+ })
+ }
+}
+
+/// A generic block device.
+///
+/// # Invariants
+///
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+pub struct GenDisk<T: Operations> {
+ _tagset: Arc<TagSet<T>>,
+ gendisk: *mut bindings::gendisk,
+}
+
+// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
+// `TagSet` It is safe to send this to other threads as long as T is Send.
+unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
+
+impl<T: Operations> Drop for GenDisk<T> {
+ fn drop(&mut self) {
+ // SAFETY: By type invariant, `self.gendisk` points to a valid and
+ // initialized instance of `struct gendisk`, and it was previously added
+ // to the VFS.
+ unsafe { bindings::del_gendisk(self.gendisk) };
+ }
+}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
new file mode 100644
index 000000000000..9ba7fdfeb4b2
--- /dev/null
+++ b/rust/kernel/block/mq/operations.rs
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides an interface for blk-mq drivers to implement.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::request::RequestDataWrapper,
+ block::mq::Request,
+ error::{from_result, Result},
+ types::ARef,
+};
+use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering};
+
+/// Implement this trait to interface blk-mq as block devices.
+///
+/// To implement a block device driver, implement this trait as described in the
+/// [module level documentation]. The kernel will use the implementation of the
+/// functions defined in this trait to interface a block device driver. Note:
+/// There is no need for an exit_request() implementation, because the `drop`
+/// implementation of the [`Request`] type will be invoked by automatically by
+/// the C/Rust glue logic.
+///
+/// [module level documentation]: kernel::block::mq
+#[macros::vtable]
+pub trait Operations: Sized {
+ /// Called by the kernel to queue a request with the driver. If `is_last` is
+ /// `false`, the driver is allowed to defer committing the request.
+ fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result;
+
+ /// Called by the kernel to indicate that queued requests should be submitted.
+ fn commit_rqs();
+
+ /// Called by the kernel to poll the device for completed requests. Only
+ /// used for poll queues.
+ fn poll() -> bool {
+ crate::build_error(crate::error::VTABLE_DEFAULT_ERROR)
+ }
+}
+
+/// A vtable for blk-mq to interact with a block device driver.
+///
+/// A `bindings::blk_mq_ops` vtable is constructed from pointers to the `extern
+/// "C"` functions of this struct, exposed through the `OperationsVTable::VTABLE`.
+///
+/// For general documentation of these methods, see the kernel source
+/// documentation related to `struct blk_mq_operations` in
+/// [`include/linux/blk-mq.h`].
+///
+/// [`include/linux/blk-mq.h`]: srctree/include/linux/blk-mq.h
+pub(crate) struct OperationsVTable<T: Operations>(PhantomData<T>);
+
+impl<T: Operations> OperationsVTable<T> {
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - The caller of this function must ensure that the pointee of `bd` is
+ /// valid for reads for the duration of this function.
+ /// - This function must be called for an initialized and live `hctx`. That
+ /// is, `Self::init_hctx_callback` was called and
+ /// `Self::exit_hctx_callback()` was not yet called.
+ /// - `(*bd).rq` must point to an initialized and live `bindings:request`.
+ /// That is, `Self::init_request_callback` was called but
+ /// `Self::exit_request_callback` was not yet called for the request.
+ /// - `(*bd).rq` must be owned by the driver. That is, the block layer must
+ /// promise to not access the request until the driver calls
+ /// `bindings::blk_mq_end_request` for the request.
+ unsafe extern "C" fn queue_rq_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ bd: *const bindings::blk_mq_queue_data,
+ ) -> bindings::blk_status_t {
+ // SAFETY: `bd.rq` is valid as required by the safety requirement for
+ // this function.
+ let request = unsafe { &*(*bd).rq.cast::<Request<T>>() };
+
+ // One refcount for the ARef, one for being in flight
+ request.wrapper_ref().refcount().store(2, Ordering::Relaxed);
+
+ // SAFETY:
+ // - We own a refcount that we took above. We pass that to `ARef`.
+ // - By the safety requirements of this function, `request` is a valid
+ // `struct request` and the private data is properly initialized.
+ // - `rq` will be alive until `blk_mq_end_request` is called and is
+ // reference counted by `ARef` until then.
+ let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+
+ // SAFETY: We have exclusive access and we just set the refcount above.
+ unsafe { Request::start_unchecked(&rq) };
+
+ let ret = T::queue_rq(
+ rq,
+ // SAFETY: `bd` is valid as required by the safety requirement for
+ // this function.
+ unsafe { (*bd).last },
+ );
+
+ if let Err(e) = ret {
+ e.to_blk_status()
+ } else {
+ bindings::BLK_STS_OK as _
+ }
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
+ T::commit_rqs()
+ }
+
+ /// This function is called by the C kernel. It is not currently
+ /// implemented, and there is no way to exercise this code path.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn poll_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _iob: *mut bindings::io_comp_batch,
+ ) -> core::ffi::c_int {
+ T::poll().into()
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure. This
+ /// function may only be called once before `exit_hctx_callback` is called
+ /// for the same context.
+ unsafe extern "C" fn init_hctx_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _tagset_data: *mut core::ffi::c_void,
+ _hctx_idx: core::ffi::c_uint,
+ ) -> core::ffi::c_int {
+ from_result(|| Ok(0))
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn exit_hctx_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _hctx_idx: core::ffi::c_uint,
+ ) {
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called by blk-mq C infrastructure.
+ /// - `_set` must point to an initialized `TagSet<T>`.
+ /// - `rq` must point to an initialized `bindings::request`.
+ /// - The allocation pointed to by `rq` must be at the size of `Request`
+ /// plus the size of `RequestDataWrapper`.
+ unsafe extern "C" fn init_request_callback(
+ _set: *mut bindings::blk_mq_tag_set,
+ rq: *mut bindings::request,
+ _hctx_idx: core::ffi::c_uint,
+ _numa_node: core::ffi::c_uint,
+ ) -> core::ffi::c_int {
+ from_result(|| {
+ // SAFETY: By the safety requirements of this function, `rq` points
+ // to a valid allocation.
+ let pdu = unsafe { Request::wrapper_ptr(rq.cast::<Request<T>>()) };
+
+ // SAFETY: The refcount field is allocated but not initialized, so
+ // it is valid for writes.
+ unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) };
+
+ Ok(0)
+ })
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called by blk-mq C infrastructure.
+ /// - `_set` must point to an initialized `TagSet<T>`.
+ /// - `rq` must point to an initialized and valid `Request`.
+ unsafe extern "C" fn exit_request_callback(
+ _set: *mut bindings::blk_mq_tag_set,
+ rq: *mut bindings::request,
+ _hctx_idx: core::ffi::c_uint,
+ ) {
+ // SAFETY: The tagset invariants guarantee that all requests are allocated with extra memory
+ // for the request data.
+ let pdu = unsafe { bindings::blk_mq_rq_to_pdu(rq) }.cast::<RequestDataWrapper>();
+
+ // SAFETY: `pdu` is valid for read and write and is properly initialised.
+ unsafe { core::ptr::drop_in_place(pdu) };
+ }
+
+ const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops {
+ queue_rq: Some(Self::queue_rq_callback),
+ queue_rqs: None,
+ commit_rqs: Some(Self::commit_rqs_callback),
+ get_budget: None,
+ put_budget: None,
+ set_rq_budget_token: None,
+ get_rq_budget_token: None,
+ timeout: None,
+ poll: if T::HAS_POLL {
+ Some(Self::poll_callback)
+ } else {
+ None
+ },
+ complete: Some(Self::complete_callback),
+ init_hctx: Some(Self::init_hctx_callback),
+ exit_hctx: Some(Self::exit_hctx_callback),
+ init_request: Some(Self::init_request_callback),
+ exit_request: Some(Self::exit_request_callback),
+ cleanup_rq: None,
+ busy: None,
+ map_queues: None,
+ #[cfg(CONFIG_BLK_DEBUG_FS)]
+ show_rq: None,
+ };
+
+ pub(crate) const fn build() -> &'static bindings::blk_mq_ops {
+ &Self::VTABLE
+ }
+}
diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs
new file mode 100644
index 000000000000..9222465d670b
--- /dev/null
+++ b/rust/kernel/block/mq/raw_writer.rs
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::fmt::{self, Write};
+
+use crate::error::Result;
+use crate::prelude::EINVAL;
+
+/// A mutable reference to a byte buffer where a string can be written into.
+///
+/// # Invariants
+///
+/// `buffer` is always null terminated.
+pub(crate) struct RawWriter<'a> {
+ buffer: &'a mut [u8],
+ pos: usize,
+}
+
+impl<'a> RawWriter<'a> {
+ /// Create a new `RawWriter` instance.
+ fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
+ *(buffer.last_mut().ok_or(EINVAL)?) = 0;
+
+ // INVARIANT: We null terminated the buffer above.
+ Ok(Self { buffer, pos: 0 })
+ }
+
+ pub(crate) fn from_array<const N: usize>(
+ a: &'a mut [core::ffi::c_char; N],
+ ) -> Result<RawWriter<'a>> {
+ Self::new(
+ // SAFETY: the buffer of `a` is valid for read and write as `u8` for
+ // at least `N` bytes.
+ unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
+ )
+ }
+}
+
+impl Write for RawWriter<'_> {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ // We do not want to overwrite our null terminator
+ if self.pos + len > self.buffer.len() - 1 {
+ return Err(fmt::Error);
+ }
+
+ // INVARIANT: We are not overwriting the last byte
+ self.buffer[self.pos..self.pos + len].copy_from_slice(bytes);
+
+ self.pos += len;
+
+ Ok(())
+ }
+}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
new file mode 100644
index 000000000000..a0e22827f3f4
--- /dev/null
+++ b/rust/kernel/block/mq/request.rs
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides a wrapper for the C `struct request` type.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::Operations,
+ error::Result,
+ types::{ARef, AlwaysRefCounted, Opaque},
+};
+use core::{
+ marker::PhantomData,
+ ptr::{addr_of_mut, NonNull},
+ sync::atomic::{AtomicU64, Ordering},
+};
+
+/// A wrapper around a blk-mq `struct request`. This represents an IO request.
+///
+/// # Implementation details
+///
+/// There are four states for a request that the Rust bindings care about:
+///
+/// A) Request is owned by block layer (refcount 0)
+/// B) Request is owned by driver but with zero `ARef`s in existence
+/// (refcount 1)
+/// C) Request is owned by driver with exactly one `ARef` in existence
+/// (refcount 2)
+/// D) Request is owned by driver with more than one `ARef` in existence
+/// (refcount > 2)
+///
+///
+/// We need to track A and B to ensure we fail tag to request conversions for
+/// requests that are not owned by the driver.
+///
+/// We need to track C and D to ensure that it is safe to end the request and hand
+/// back ownership to the block layer.
+///
+/// The states are tracked through the private `refcount` field of
+/// `RequestDataWrapper`. This structure lives in the private data area of the C
+/// `struct request`.
+///
+/// # Invariants
+///
+/// * `self.0` is a valid `struct request` created by the C portion of the kernel.
+/// * The private data area associated with this request must be an initialized
+/// and valid `RequestDataWrapper<T>`.
+/// * `self` is reference counted by atomic modification of
+/// self.wrapper_ref().refcount().
+///
+#[repr(transparent)]
+pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>);
+
+impl<T: Operations> Request<T> {
+ /// Create an `ARef<Request>` from a `struct request` pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The caller must own a refcount on `ptr` that is transferred to the
+ /// returned `ARef`.
+ /// * The type invariants for `Request` must hold for the pointee of `ptr`.
+ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> {
+ // INVARIANT: By the safety requirements of this function, invariants are upheld.
+ // SAFETY: By the safety requirement of this function, we own a
+ // reference count that we can pass to `ARef`.
+ unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) }
+ }
+
+ /// Notify the block layer that a request is going to be processed now.
+ ///
+ /// The block layer uses this hook to do proper initializations such as
+ /// starting the timeout timer. It is a requirement that block device
+ /// drivers call this function when starting to process a request.
+ ///
+ /// # Safety
+ ///
+ /// The caller must have exclusive ownership of `self`, that is
+ /// `self.wrapper_ref().refcount() == 2`.
+ pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) {
+ // SAFETY: By type invariant, `self.0` is a valid `struct request` and
+ // we have exclusive access.
+ unsafe { bindings::blk_mq_start_request(this.0.get()) };
+ }
+
+ /// Try to take exclusive ownership of `this` by dropping the refcount to 0.
+ /// This fails if `this` is not the only `ARef` pointing to the underlying
+ /// `Request`.
+ ///
+ /// If the operation is successful, `Ok` is returned with a pointer to the
+ /// C `struct request`. If the operation fails, `this` is returned in the
+ /// `Err` variant.
+ fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> {
+ // We can race with `TagSet::tag_to_rq`
+ if let Err(_old) = this.wrapper_ref().refcount().compare_exchange(
+ 2,
+ 0,
+ Ordering::Relaxed,
+ Ordering::Relaxed,
+ ) {
+ return Err(this);
+ }
+
+ let request_ptr = this.0.get();
+ core::mem::forget(this);
+
+ Ok(request_ptr)
+ }
+
+ /// Notify the block layer that the request has been completed without errors.
+ ///
+ /// This function will return `Err` if `this` is not the only `ARef`
+ /// referencing the request.
+ pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> {
+ let request_ptr = Self::try_set_end(this)?;
+
+ // SAFETY: By type invariant, `this.0` was a valid `struct request`. The
+ // success of the call to `try_set_end` guarantees that there are no
+ // `ARef`s pointing to this request. Therefore it is safe to hand it
+ // back to the block layer.
+ unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) };
+
+ Ok(())
+ }
+
+ /// Return a pointer to the `RequestDataWrapper` stored in the private area
+ /// of the request structure.
+ ///
+ /// # Safety
+ ///
+ /// - `this` must point to a valid allocation of size at least size of
+ /// `Self` plus size of `RequestDataWrapper`.
+ pub(crate) unsafe fn wrapper_ptr(this: *mut Self) -> NonNull<RequestDataWrapper> {
+ let request_ptr = this.cast::<bindings::request>();
+ // SAFETY: By safety requirements for this function, `this` is a
+ // valid allocation.
+ let wrapper_ptr =
+ unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() };
+ // SAFETY: By C API contract, wrapper_ptr points to a valid allocation
+ // and is not null.
+ unsafe { NonNull::new_unchecked(wrapper_ptr) }
+ }
+
+ /// Return a reference to the `RequestDataWrapper` stored in the private
+ /// area of the request structure.
+ pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper {
+ // SAFETY: By type invariant, `self.0` is a valid allocation. Further,
+ // the private data associated with this request is initialized and
+ // valid. The existence of `&self` guarantees that the private data is
+ // valid as a shared reference.
+ unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() }
+ }
+}
+
+/// A wrapper around data stored in the private area of the C `struct request`.
+pub(crate) struct RequestDataWrapper {
+ /// The Rust request refcount has the following states:
+ ///
+ /// - 0: The request is owned by C block layer.
+ /// - 1: The request is owned by Rust abstractions but there are no ARef references to it.
+ /// - 2+: There are `ARef` references to the request.
+ refcount: AtomicU64,
+}
+
+impl RequestDataWrapper {
+ /// Return a reference to the refcount of the request that is embedding
+ /// `self`.
+ pub(crate) fn refcount(&self) -> &AtomicU64 {
+ &self.refcount
+ }
+
+ /// Return a pointer to the refcount of the request that is embedding the
+ /// pointee of `this`.
+ ///
+ /// # Safety
+ ///
+ /// - `this` must point to a live allocation of at least the size of `Self`.
+ pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 {
+ // SAFETY: Because of the safety requirements of this function, the
+ // field projection is safe.
+ unsafe { addr_of_mut!((*this).refcount) }
+ }
+}
+
+// SAFETY: Exclusive access is thread-safe for `Request`. `Request` has no `&mut
+// self` methods and `&self` methods that mutate `self` are internally
+// synchronized.
+unsafe impl<T: Operations> Send for Request<T> {}
+
+// SAFETY: Shared access is thread-safe for `Request`. `&self` methods that
+// mutate `self` are internally synchronized`
+unsafe impl<T: Operations> Sync for Request<T> {}
+
+/// Store the result of `op(target.load())` in target, returning new value of
+/// target.
+fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 {
+ let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x)));
+
+ // SAFETY: Because the operation passed to `fetch_update` above always
+ // return `Some`, `old` will always be `Ok`.
+ let old = unsafe { old.unwrap_unchecked() };
+
+ op(old)
+}
+
+/// Store the result of `op(target.load)` in `target` if `target.load() !=
+/// pred`, returning true if the target was updated.
+fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool {
+ target
+ .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| {
+ if x == pred {
+ None
+ } else {
+ Some(op(x))
+ }
+ })
+ .is_ok()
+}
+
+// SAFETY: All instances of `Request<T>` are reference counted. This
+// implementation of `AlwaysRefCounted` ensure that increments to the ref count
+// keeps the object alive in memory at least until a matching reference count
+// decrement is executed.
+unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
+ fn inc_ref(&self) {
+ let refcount = &self.wrapper_ref().refcount();
+
+ #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
+ let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0);
+
+ #[cfg(CONFIG_DEBUG_MISC)]
+ if !updated {
+ panic!("Request refcount zero on clone")
+ }
+ }
+
+ unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
+ // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid
+ // for read.
+ let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() };
+ // SAFETY: The type invariant of `Request` guarantees that the private
+ // data area is initialized and valid.
+ let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
+
+ #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
+ let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1);
+
+ #[cfg(CONFIG_DEBUG_MISC)]
+ if new_refcount == 0 {
+ panic!("Request reached refcount zero in Rust abstractions");
+ }
+ }
+}
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
new file mode 100644
index 000000000000..f9a1ca655a35
--- /dev/null
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides the `TagSet` struct to wrap the C `struct blk_mq_tag_set`.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use core::pin::Pin;
+
+use crate::{
+ bindings,
+ block::mq::{operations::OperationsVTable, request::RequestDataWrapper, Operations},
+ error,
+ prelude::PinInit,
+ try_pin_init,
+ types::Opaque,
+};
+use core::{convert::TryInto, marker::PhantomData};
+use macros::{pin_data, pinned_drop};
+
+/// A wrapper for the C `struct blk_mq_tag_set`.
+///
+/// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned.
+///
+/// # Invariants
+///
+/// - `inner` is initialized and valid.
+#[pin_data(PinnedDrop)]
+#[repr(transparent)]
+pub struct TagSet<T: Operations> {
+ #[pin]
+ inner: Opaque<bindings::blk_mq_tag_set>,
+ _p: PhantomData<T>,
+}
+
+impl<T: Operations> TagSet<T> {
+ /// Try to create a new tag set
+ pub fn new(
+ nr_hw_queues: u32,
+ num_tags: u32,
+ num_maps: u32,
+ ) -> impl PinInit<Self, error::Error> {
+ // SAFETY: `blk_mq_tag_set` only contains integers and pointers, which
+ // all are allowed to be 0.
+ let tag_set: bindings::blk_mq_tag_set = unsafe { core::mem::zeroed() };
+ let tag_set = core::mem::size_of::<RequestDataWrapper>()
+ .try_into()
+ .map(|cmd_size| {
+ bindings::blk_mq_tag_set {
+ ops: OperationsVTable::<T>::build(),
+ nr_hw_queues,
+ timeout: 0, // 0 means default which is 30Hz in C
+ numa_node: bindings::NUMA_NO_NODE,
+ queue_depth: num_tags,
+ cmd_size,
+ flags: bindings::BLK_MQ_F_SHOULD_MERGE,
+ driver_data: core::ptr::null_mut::<core::ffi::c_void>(),
+ nr_maps: num_maps,
+ ..tag_set
+ }
+ });
+
+ try_pin_init!(TagSet {
+ inner <- PinInit::<_, error::Error>::pin_chain(Opaque::new(tag_set?), |tag_set| {
+ // SAFETY: we do not move out of `tag_set`.
+ let tag_set = unsafe { Pin::get_unchecked_mut(tag_set) };
+ // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`.
+ error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())})
+ }),
+ _p: PhantomData,
+ })
+ }
+
+ /// Return the pointer to the wrapped `struct blk_mq_tag_set`
+ pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set {
+ self.inner.get()
+ }
+}
+
+#[pinned_drop]
+impl<T: Operations> PinnedDrop for TagSet<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: By type invariant `inner` is valid and has been properly
+ // initialized during construction.
+ unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) };
+ }
+}
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 55280ae9fe40..145f5c397009 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -126,6 +126,12 @@ impl Error {
self.0
}
+ #[cfg(CONFIG_BLOCK)]
+ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t {
+ // SAFETY: `self.0` is a valid error due to its invariant.
+ unsafe { bindings::errno_to_blk_status(self.0) }
+ }
+
/// Returns the error encoded as a pointer.
#[allow(dead_code)]
pub(crate) fn to_ptr<T>(self) -> *mut T {
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index fbd91a48ff8b..2cf7c6b6f66b 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -27,6 +27,8 @@ compile_error!("Missing kernel configuration for conditional compilation");
extern crate self as kernel;
pub mod alloc;
+#[cfg(CONFIG_BLOCK)]
+pub mod block;
mod build_assert;
pub mod error;
pub mod init;
diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index 67956f6496a5..9d920419a62c 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst
@@ -17,7 +17,7 @@ include $(srctree)/scripts/Kbuild.include
dst := $(INSTALL_DTBS_PATH)
quiet_cmd_dtb_install = INSTALL $@
- cmd_dtb_install = install -D $< $@
+ cmd_dtb_install = install -D -m 0644 $< $@
$(dst)/%: $(obj)/%
$(call cmd,dtb_install)
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index d35f55e0d141..e85be7721a48 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -146,7 +146,7 @@ $(call multi_depend, $(host-cxxmulti), , -objs -cxxobjs)
# Create .o file from a single .cc (C++) file
quiet_cmd_host-cxxobjs = HOSTCXX $@
cmd_host-cxxobjs = $(HOSTCXX) $(hostcxx_flags) -c -o $@ $<
-$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
+$(host-cxxobjs): $(obj)/%.o: $(obj)/%.cc FORCE
$(call if_changed_dep,host-cxxobjs)
# Create executable from a single Rust crate (which may consist of
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index 38653f3e8108..bf016af8bf8a 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -103,7 +103,7 @@ debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/-
debian-orig: private orig-name = $(source)_$(version).orig.tar$(debian-orig-suffix)
debian-orig: mkdebian-opts = --need-source
debian-orig: linux.tar$(debian-orig-suffix) debian
- $(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \
+ $(Q)if [ "$$(df --output=target .. 2>/dev/null)" = "$$(df --output=target $< 2>/dev/null)" ]; then \
ln -f $< ../$(orig-name); \
else \
cp $< ../$(orig-name); \
diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile
index fd1402c0a1a1..fcd32fcf3ae0 100644
--- a/scripts/gdb/linux/Makefile
+++ b/scripts/gdb/linux/Makefile
@@ -5,7 +5,7 @@ ifdef building_out_of_srctree
symlinks := $(patsubst $(src)/%,%,$(wildcard $(src)/*.py))
quiet_cmd_symlink = SYMLINK $@
- cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(src)/%,$@) $@
+ cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(src))/%,$@) $@
always-y += $(symlinks)
$(addprefix $(obj)/, $(symlinks)): FORCE
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
index a78b804b680c..b9513d224476 100755
--- a/scripts/ld-version.sh
+++ b/scripts/ld-version.sh
@@ -57,9 +57,11 @@ else
fi
fi
-# Some distributions append a package release number, as in 2.34-4.fc32
-# Trim the hyphen and any characters that follow.
-version=${version%-*}
+# There may be something after the version, such as a distribution's package
+# release number (like Fedora's "2.34-4.fc32") or punctuation (like LLD briefly
+# added before the "compatible with GNU linkers" string), so remove everything
+# after just numbers and periods.
+version=${version%%[!0-9.]*}
cversion=$(get_canonical_version $version)
min_cversion=$(get_canonical_version $min_version)
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
index e095eb1e290e..c52d517b9364 100644
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -57,7 +57,8 @@ patch -p1 < %{SOURCE2}
%install
mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE}
cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz
-%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+# DEPMOD=true makes depmod no-op. We do not package depmod-generated files.
+%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} DEPMOD=true modules_install
%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE}
cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config
@@ -70,10 +71,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA
%endif
{
- for x in System.map config kernel modules.builtin \
- modules.builtin.modinfo modules.order vmlinuz; do
- echo "/lib/modules/%{KERNELRELEASE}/${x}"
- done
+ echo "/lib/modules/%{KERNELRELEASE}"
for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \
devname softdep symbols symbols.bin; do
@@ -85,7 +83,6 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA
done
if [ -d "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" ];then
- echo "/lib/modules/%{KERNELRELEASE}/dtb"
find "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" -printf "%%%ghost /boot/dtb-%{KERNELRELEASE}/%%P\n"
fi
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index effbf5982be1..2cff851ebfd7 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -255,21 +255,6 @@ config INIT_ON_FREE_DEFAULT_ON
touching "cold" memory areas. Most cases see 3-5% impact. Some
synthetic workloads have measured as high as 8%.
-config INIT_MLOCKED_ON_FREE_DEFAULT_ON
- bool "Enable mlocked memory zeroing on free"
- depends on !KMSAN
- help
- This config has the effect of setting "init_mlocked_on_free=1"
- on the kernel command line. If it is enabled, all mlocked process
- memory is zeroed when freed. This restriction to mlocked memory
- improves performance over "init_on_free" but can still be used to
- protect confidential data like key material from content exposures
- to other processes, as well as live forensics and cold boot attacks.
- Any non-mlocked memory is not cleared before it is reassigned. This
- configuration can be overwritten by setting "init_mlocked_on_free=0"
- on the command line. The "init_on_free" boot option takes
- precedence over "init_mlocked_on_free".
-
config CC_HAS_ZERO_CALL_USED_REGS
def_bool $(cc-option,-fzero-call-used-regs=used-gpr)
# https://github.com/ClangBuiltLinux/linux/issues/1766
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index 45beb1c5f747..6b5181c668b5 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -217,7 +217,7 @@ void aa_audit_rule_free(void *vrule)
}
}
-int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
+int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp)
{
struct aa_audit_rule *rule;
@@ -230,14 +230,14 @@ int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
return -EINVAL;
}
- rule = kzalloc(sizeof(struct aa_audit_rule), GFP_KERNEL);
+ rule = kzalloc(sizeof(struct aa_audit_rule), gfp);
if (!rule)
return -ENOMEM;
/* Currently rules are treated as coming from the root ns */
rule->label = aa_label_parse(&root_ns->unconfined->label, rulestr,
- GFP_KERNEL, true, false);
+ gfp, true, false);
if (IS_ERR(rule->label)) {
int err = PTR_ERR(rule->label);
aa_audit_rule_free(rule);
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index acbb03b9bd25..0c8cc86b417b 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -200,7 +200,7 @@ static inline int complain_error(int error)
}
void aa_audit_rule_free(void *vrule);
-int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule);
+int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp);
int aa_audit_rule_known(struct audit_krule *rule);
int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule);
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 3e568126cd48..c51e24d24d1e 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -546,7 +546,7 @@ static inline void ima_free_modsig(struct modsig *modsig)
#else
static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr,
- void **lsmrule)
+ void **lsmrule, gfp_t gfp)
{
return -EINVAL;
}
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index abdd22007ed8..e4a79a9b2d58 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists)
kfree(lists);
}
-
- securityfs_measurement_list_count = 0;
}
static int __init create_securityfs_measurement_lists(void)
@@ -625,6 +623,7 @@ out:
securityfs_remove(binary_runtime_measurements);
remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists);
remove_securityfs_measurement_lists(binary_securityfs_measurement_lists);
+ securityfs_measurement_list_count = 0;
securityfs_remove(ima_symlink);
securityfs_remove(ima_dir);
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c0556907c2e6..09da8e639239 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -401,7 +401,8 @@ static void ima_free_rule(struct ima_rule_entry *entry)
kfree(entry);
}
-static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
+static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry,
+ gfp_t gfp)
{
struct ima_rule_entry *nentry;
int i;
@@ -410,7 +411,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
* Immutable elements are copied over as pointers and data; only
* lsm rules can change
*/
- nentry = kmemdup(entry, sizeof(*nentry), GFP_KERNEL);
+ nentry = kmemdup(entry, sizeof(*nentry), gfp);
if (!nentry)
return NULL;
@@ -425,7 +426,8 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
ima_filter_rule_init(nentry->lsm[i].type, Audit_equal,
nentry->lsm[i].args_p,
- &nentry->lsm[i].rule);
+ &nentry->lsm[i].rule,
+ gfp);
if (!nentry->lsm[i].rule)
pr_warn("rule for LSM \'%s\' is undefined\n",
nentry->lsm[i].args_p);
@@ -438,7 +440,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry)
int i;
struct ima_rule_entry *nentry;
- nentry = ima_lsm_copy_rule(entry);
+ nentry = ima_lsm_copy_rule(entry, GFP_KERNEL);
if (!nentry)
return -ENOMEM;
@@ -664,7 +666,7 @@ retry:
}
if (rc == -ESTALE && !rule_reinitialized) {
- lsm_rule = ima_lsm_copy_rule(rule);
+ lsm_rule = ima_lsm_copy_rule(rule, GFP_ATOMIC);
if (lsm_rule) {
rule_reinitialized = true;
goto retry;
@@ -1140,7 +1142,8 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry,
entry->lsm[lsm_rule].type = audit_type;
result = ima_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal,
entry->lsm[lsm_rule].args_p,
- &entry->lsm[lsm_rule].rule);
+ &entry->lsm[lsm_rule].rule,
+ GFP_KERNEL);
if (!entry->lsm[lsm_rule].rule) {
pr_warn("rule for LSM \'%s\' is undefined\n",
entry->lsm[lsm_rule].args_p);
diff --git a/security/security.c b/security/security.c
index e5da848c50b9..e5ca08789f74 100644
--- a/security/security.c
+++ b/security/security.c
@@ -5332,15 +5332,17 @@ void security_key_post_create_or_update(struct key *keyring, struct key *key,
* @op: rule operator
* @rulestr: rule context
* @lsmrule: receive buffer for audit rule struct
+ * @gfp: GFP flag used for kmalloc
*
* Allocate and initialize an LSM audit rule structure.
*
* Return: Return 0 if @lsmrule has been successfully set, -EINVAL in case of
* an invalid rule.
*/
-int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule)
+int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
+ gfp_t gfp)
{
- return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule);
+ return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule, gfp);
}
/**
diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h
index 52aca71210b4..29c7d4c86f6d 100644
--- a/security/selinux/include/audit.h
+++ b/security/selinux/include/audit.h
@@ -21,12 +21,14 @@
* @op: the operator the rule uses
* @rulestr: the text "target" of the rule
* @rule: pointer to the new rule structure returned via this
+ * @gfp: GFP flag used for kmalloc
*
* Returns 0 if successful, -errno if not. On success, the rule structure
* will be allocated internally. The caller must free this structure with
* selinux_audit_rule_free() after use.
*/
-int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule);
+int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule,
+ gfp_t gfp);
/**
* selinux_audit_rule_free - free an selinux audit rule structure.
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index f20e1968b7f7..e33e55384b75 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -3507,7 +3507,8 @@ void selinux_audit_rule_free(void *vrule)
}
}
-int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
+int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule,
+ gfp_t gfp)
{
struct selinux_state *state = &selinux_state;
struct selinux_policy *policy;
@@ -3548,7 +3549,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
return -EINVAL;
}
- tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL);
+ tmprule = kzalloc(sizeof(struct selinux_audit_rule), gfp);
if (!tmprule)
return -ENOMEM;
context_init(&tmprule->au_ctxt);
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 70ba2841e181..f5cbec1e6a92 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4693,11 +4693,13 @@ static int smack_post_notification(const struct cred *w_cred,
* @op: required testing operator (=, !=, >, <, ...)
* @rulestr: smack label to be audited
* @vrule: pointer to save our own audit rule representation
+ * @gfp: type of the memory for the allocation
*
* Prepare to audit cases where (@field @op @rulestr) is true.
* The label to be audited is created if necessay.
*/
-static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
+static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule,
+ gfp_t gfp)
{
struct smack_known *skp;
char **rule = (char **)vrule;
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index b6684a074a59..39944a859ff6 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -111,6 +111,7 @@ static void report_access(const char *access, struct task_struct *target,
/**
* yama_relation_cleanup - remove invalid entries from the relation list
+ * @work: unused
*
*/
static void yama_relation_cleanup(struct work_struct *work)
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index 12aa1cef11a1..cc5db93b9132 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -349,6 +349,16 @@ int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream,
}
EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan);
+int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream)
+{
+ struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
+
+ dmaengine_synchronize(prtd->dma_chan);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_sync_stop);
+
/**
* snd_dmaengine_pcm_close - Close a dmaengine based PCM substream
* @substream: PCM substream
@@ -358,6 +368,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan);
int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream)
{
struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
+ struct dma_tx_state state;
+ enum dma_status status;
+
+ status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state);
+ if (status == DMA_PAUSED)
+ dmaengine_terminate_async(prtd->dma_chan);
dmaengine_synchronize(prtd->dma_chan);
kfree(prtd);
@@ -378,6 +394,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close);
int snd_dmaengine_pcm_close_release_chan(struct snd_pcm_substream *substream)
{
struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
+ struct dma_tx_state state;
+ enum dma_status status;
+
+ status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state);
+ if (status == DMA_PAUSED)
+ dmaengine_terminate_async(prtd->dma_chan);
dmaengine_synchronize(prtd->dma_chan);
dma_release_channel(prtd->dma_chan);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 521ba56392a0..c152ccf32214 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1775,6 +1775,8 @@ static int snd_pcm_pre_resume(struct snd_pcm_substream *substream,
snd_pcm_state_t state)
{
struct snd_pcm_runtime *runtime = substream->runtime;
+ if (runtime->state != SNDRV_PCM_STATE_SUSPENDED)
+ return -EBADFD;
if (!(runtime->info & SNDRV_PCM_INFO_RESUME))
return -ENOSYS;
runtime->trigger_master = substream;
diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index 171fb75267af..e90b27a135e6 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -791,7 +791,8 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event,
/* set up the MIDI2 RPN/NRPN packet data from the parsed info */
static void fill_rpn(struct snd_seq_ump_midi2_bank *cc,
- union snd_ump_midi2_msg *data)
+ union snd_ump_midi2_msg *data,
+ unsigned char channel)
{
if (cc->rpn_set) {
data->rpn.status = UMP_MSG_STATUS_RPN;
@@ -808,6 +809,7 @@ static void fill_rpn(struct snd_seq_ump_midi2_bank *cc,
}
data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) |
cc->cc_data_lsb);
+ data->rpn.channel = channel;
cc->cc_data_msb = cc->cc_data_lsb = 0;
}
@@ -855,7 +857,7 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event,
cc->cc_data_lsb = val;
if (!(cc->rpn_set || cc->nrpn_set))
return 0; // skip
- fill_rpn(cc, data);
+ fill_rpn(cc, data, channel);
return 1;
}
@@ -957,7 +959,7 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event,
cc->cc_data_lsb = lsb;
if (!(cc->rpn_set || cc->nrpn_set))
return 0; // skip
- fill_rpn(cc, data);
+ fill_rpn(cc, data, channel);
return 1;
}
@@ -1018,7 +1020,7 @@ static int system_2p_ev_to_ump_midi2(const struct snd_seq_event *event,
union snd_ump_midi2_msg *data,
unsigned char status)
{
- return system_1p_ev_to_ump_midi1(event, dest_port,
+ return system_2p_ev_to_ump_midi1(event, dest_port,
(union snd_ump_midi1_msg *)data,
status);
}
@@ -1075,6 +1077,8 @@ static const struct seq_ev_to_ump seq_ev_ump_encoders[] = {
system_ev_to_ump_midi1, system_ev_to_ump_midi2 },
{ SNDRV_SEQ_EVENT_SENSING, UMP_SYSTEM_STATUS_ACTIVE_SENSING,
system_ev_to_ump_midi1, system_ev_to_ump_midi2 },
+ { SNDRV_SEQ_EVENT_RESET, UMP_SYSTEM_STATUS_RESET,
+ system_ev_to_ump_midi1, system_ev_to_ump_midi2 },
};
static const struct seq_ev_to_ump *find_ump_encoder(int type)
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index 537863447358..478d2b50c571 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -18,7 +18,7 @@
static int dsp_driver;
module_param(dsp_driver, int, 0444);
-MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF)");
+MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF, 4=AVS)");
#define FLAG_SST BIT(0)
#define FLAG_SOF BIT(1)
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 164335d3c200..4b1baf4dd50e 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -204,6 +204,7 @@ module_param(numWriteBufs, int, 0);
static unsigned int writeBufSize = DEFAULT_BUFF_SIZE ; /* in bytes */
module_param(writeBufSize, int, 0);
+MODULE_DESCRIPTION("Atari/Amiga/Q40 core DMA sound driver");
MODULE_LICENSE("GPL");
static int sq_unit = -1;
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 0da625533afc..a3cf0725fc43 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -162,6 +162,7 @@ config SND_HDA_SCODEC_CS35L56_I2C
depends on ACPI || COMPILE_TEST
depends on SND_SOC
select FW_CS_DSP
+ imply SERIAL_MULTI_INSTANTIATE
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
@@ -178,6 +179,7 @@ config SND_HDA_SCODEC_CS35L56_SPI
depends on ACPI || COMPILE_TEST
depends on SND_SOC
select FW_CS_DSP
+ imply SERIAL_MULTI_INSTANTIATE
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index 6c49e5c6cd20..031703f010be 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -1495,7 +1495,7 @@ static void cs35l41_hda_unbind(struct device *dev, struct device *master, void *
if (comps[cs35l41->index].dev == dev) {
memset(&comps[cs35l41->index], 0, sizeof(*comps));
sleep_flags = lock_system_sleep();
- device_link_remove(&comps->codec->core.dev, cs35l41->dev);
+ device_link_remove(&cs35l41->codec->core.dev, cs35l41->dev);
unlock_system_sleep(sleep_flags);
}
}
@@ -2019,6 +2019,8 @@ void cs35l41_hda_remove(struct device *dev)
{
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
+ component_del(cs35l41->dev, &cs35l41_hda_comp_ops);
+
pm_runtime_get_sync(cs35l41->dev);
pm_runtime_dont_use_autosuspend(cs35l41->dev);
pm_runtime_disable(cs35l41->dev);
@@ -2026,8 +2028,6 @@ void cs35l41_hda_remove(struct device *dev)
if (cs35l41->halo_initialized)
cs35l41_remove_dsp(cs35l41);
- component_del(cs35l41->dev, &cs35l41_hda_comp_ops);
-
acpi_dev_put(cs35l41->dacpi);
pm_runtime_put_noidle(cs35l41->dev);
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
index 6a7a6d486916..80c816922f78 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -128,6 +128,10 @@ static const struct cs35l41_config cs35l41_config_table[] = {
{ "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38B7", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "17AA38C7", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 },
+ { "17AA38C8", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 },
+ { "17AA38F9", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
+ { "17AA38FA", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{}
};
@@ -529,6 +533,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "17AA38B5", generic_dsd_config },
{ "CSC3551", "17AA38B6", generic_dsd_config },
{ "CSC3551", "17AA38B7", generic_dsd_config },
+ { "CSC3551", "17AA38C7", generic_dsd_config },
+ { "CSC3551", "17AA38C8", generic_dsd_config },
+ { "CSC3551", "17AA38F9", generic_dsd_config },
+ { "CSC3551", "17AA38FA", generic_dsd_config },
{}
};
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 11b0570ff56d..e134ede6c5aa 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -735,6 +735,8 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void *
if (comps[cs35l56->index].dev == dev)
memset(&comps[cs35l56->index], 0, sizeof(*comps));
+ cs35l56->codec = NULL;
+
dev_dbg(cs35l56->base.dev, "Unbound\n");
}
@@ -840,6 +842,9 @@ static int cs35l56_hda_system_resume(struct device *dev)
cs35l56->suspended = false;
+ if (!cs35l56->codec)
+ return 0;
+
ret = cs35l56_is_fw_reload_needed(&cs35l56->base);
dev_dbg(cs35l56->base.dev, "fw_reload_needed: %d\n", ret);
if (ret > 0) {
@@ -1072,12 +1077,12 @@ void cs35l56_hda_remove(struct device *dev)
{
struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev);
+ component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops);
+
pm_runtime_dont_use_autosuspend(cs35l56->base.dev);
pm_runtime_get_sync(cs35l56->base.dev);
pm_runtime_disable(cs35l56->base.dev);
- component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops);
-
cs_dsp_remove(&cs35l56->cs_dsp);
kfree(cs35l56->system_name);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index aa76d1c88589..766f0b1d3e9d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -583,10 +583,14 @@ static void alc_shutup_pins(struct hda_codec *codec)
switch (codec->core.vendor_id) {
case 0x10ec0236:
case 0x10ec0256:
+ case 0x10ec0257:
case 0x19e58326:
case 0x10ec0283:
+ case 0x10ec0285:
case 0x10ec0286:
+ case 0x10ec0287:
case 0x10ec0288:
+ case 0x10ec0295:
case 0x10ec0298:
alc_headset_mic_no_shutup(codec);
break;
@@ -7520,6 +7524,8 @@ enum {
ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1,
ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318,
ALC256_FIXUP_CHROME_BOOK,
+ ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7,
+ ALC287_FIXUP_LENOVO_SSID_17AA3820,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -7559,6 +7565,21 @@ static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec,
__snd_hda_apply_fixup(codec, id, action, 0);
}
+/* Similar to above the Lenovo Yoga Pro 7 14ARP8 PCI SSID matches the codec SSID of the
+ Legion Y9000X 2022 IAH7.*/
+static void alc287_fixup_lenovo_14arp8_legion_iah7(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ int id;
+
+ if (codec->core.subsystem_id == 0x17aa386e)
+ id = ALC287_FIXUP_CS35L41_I2C_2; /* Legion Y9000X 2022 IAH7 */
+ else
+ id = ALC285_FIXUP_SPEAKER2_TO_DAC1; /* Yoga Pro 7 14ARP8 */
+ __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
/* Another hilarious PCI SSID conflict with Lenovo Legion Pro 7 16ARX8H (with
* TAS2781 codec) and Legion 7i 16IAX7 (with CS35L41 codec);
* we apply a corresponding fixup depending on the codec SSID instead
@@ -7576,6 +7597,20 @@ static void alc287_fixup_lenovo_legion_7(struct hda_codec *codec,
__snd_hda_apply_fixup(codec, id, action, 0);
}
+/* Yet more conflicting PCI SSID (17aa:3820) on two Lenovo models */
+static void alc287_fixup_lenovo_ssid_17aa3820(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ int id;
+
+ if (codec->core.subsystem_id == 0x17aa3820)
+ id = ALC269_FIXUP_ASPIRE_HEADSET_MIC; /* IdeaPad 330-17IKB 81DM */
+ else /* 0x17aa3802 */
+ id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* "Yoga Duet 7 13ITL6 */
+ __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
static const struct hda_fixup alc269_fixups[] = {
[ALC269_FIXUP_GPIO2] = {
.type = HDA_FIXUP_FUNC,
@@ -9658,6 +9693,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK,
},
+ [ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc287_fixup_lenovo_14arp8_legion_iah7,
+ },
[ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc287_fixup_yoga9_14iap7_bass_spk_pin,
@@ -9808,6 +9847,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC225_FIXUP_HEADSET_JACK
},
+ [ALC287_FIXUP_LENOVO_SSID_17AA3820] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc287_fixup_lenovo_ssid_17aa3820,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10010,6 +10053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+ SND_PCI_QUIRK(0x103c, 0x84a6, "HP 250 G7 Notebook PC", ALC269_FIXUP_HP_LINE1_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
@@ -10045,6 +10089,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87f1, "HP ProBook 630 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -10194,6 +10239,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c7b, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c7c, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c7d, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c7e, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c7f, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c80, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c81, "HP EliteBook 665 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c89, "HP ProBook 460 G11", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
@@ -10332,6 +10384,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
+ SND_PCI_QUIRK(0x10ec, 0x11bc, "VAIO VJFE-IL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
@@ -10429,6 +10482,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -10502,7 +10556,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
- SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3820, "IdeaPad 330 / Yoga Duet 7", ALC287_FIXUP_LENOVO_SSID_17AA3820),
SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
@@ -10516,7 +10570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3865, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3866, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
- SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7 / Yoga Pro 7 14ARP8", ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7),
SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7/7i", ALC287_FIXUP_LENOVO_LEGION_7),
SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10527,6 +10581,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x3891, "Lenovo Yoga Pro 7 14AHP9", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
@@ -10540,10 +10595,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x38be, "Yoga S980-14.5 proX YC Dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38bf, "Yoga S980-14.5 proX LX Dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x38c7, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4),
+ SND_PCI_QUIRK(0x17aa, 0x38c8, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4),
SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38d2, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x38d7, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN),
+ SND_PCI_QUIRK(0x17aa, 0x38f9, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -10581,6 +10640,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
+ SND_PCI_QUIRK(0x1c6c, 0x122a, "Positivo N14AP7", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
@@ -10598,6 +10658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
@@ -10605,7 +10666,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
#if 0
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 75f7674c66ee..fdee6592c502 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -777,11 +777,11 @@ static void tas2781_hda_remove(struct device *dev)
{
struct tas2781_hda *tas_hda = dev_get_drvdata(dev);
+ component_del(tas_hda->dev, &tas2781_hda_comp_ops);
+
pm_runtime_get_sync(tas_hda->dev);
pm_runtime_disable(tas_hda->dev);
- component_del(tas_hda->dev, &tas2781_hda_comp_ops);
-
pm_runtime_put_noidle(tas_hda->dev);
tasdevice_remove(tas_hda->priv);
diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c
index 60cbc881be6e..ef12f97ddc69 100644
--- a/sound/soc/amd/acp/acp-i2s.c
+++ b/sound/soc/amd/acp/acp-i2s.c
@@ -588,20 +588,12 @@ static int acp_i2s_probe(struct snd_soc_dai *dai)
{
struct device *dev = dai->component->dev;
struct acp_dev_data *adata = dev_get_drvdata(dev);
- struct acp_resource *rsrc = adata->rsrc;
- unsigned int val;
if (!adata->acp_base) {
dev_err(dev, "I2S base is NULL\n");
return -EINVAL;
}
- val = readl(adata->acp_base + rsrc->i2s_pin_cfg_offset);
- if (val != rsrc->i2s_mode) {
- dev_err(dev, "I2S Mode not supported val %x\n", val);
- return -EINVAL;
- }
-
return 0;
}
diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
index ad320b29e87d..777b5a78d8a9 100644
--- a/sound/soc/amd/acp/acp-pci.c
+++ b/sound/soc/amd/acp/acp-pci.c
@@ -100,6 +100,7 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
ret = -EINVAL;
goto release_regions;
}
+ chip->flag = flag;
dmic_dev = platform_device_register_data(dev, "dmic-codec", PLATFORM_DEVID_NONE, NULL, 0);
if (IS_ERR(dmic_dev)) {
dev_err(dev, "failed to create DMIC device\n");
@@ -139,7 +140,6 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
}
}
- chip->flag = flag;
memset(&pdevinfo, 0, sizeof(pdevinfo));
pdevinfo.name = chip->name;
@@ -199,10 +199,12 @@ static int __maybe_unused snd_acp_resume(struct device *dev)
ret = acp_init(chip);
if (ret)
dev_err(dev, "ACP init failed\n");
- child = chip->chip_pdev->dev;
- adata = dev_get_drvdata(&child);
- if (adata)
- acp_enable_interrupts(adata);
+ if (chip->chip_pdev) {
+ child = chip->chip_pdev->dev;
+ adata = dev_get_drvdata(&child);
+ if (adata)
+ acp_enable_interrupts(adata);
+ }
return ret;
}
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 1760b5d42460..4e3a8ce690a4 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -283,6 +283,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M5602RA"),
+ }
+ },
{
.driver_data = &acp6x_card,
.matches = {
diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 6aed1ee443b4..ba314b279919 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -473,19 +473,22 @@ static int atmel_classd_asoc_card_init(struct device *dev,
if (!dai_link)
return -ENOMEM;
- comp = devm_kzalloc(dev, sizeof(*comp), GFP_KERNEL);
+ comp = devm_kzalloc(dev, 2 * sizeof(*comp), GFP_KERNEL);
if (!comp)
return -ENOMEM;
- dai_link->cpus = comp;
+ dai_link->cpus = &comp[0];
dai_link->codecs = &snd_soc_dummy_dlc;
+ dai_link->platforms = &comp[1];
dai_link->num_cpus = 1;
dai_link->num_codecs = 1;
+ dai_link->num_platforms = 1;
dai_link->name = "CLASSD";
dai_link->stream_name = "CLASSD PCM";
dai_link->cpus->dai_name = dev_name(dev);
+ dai_link->platforms->name = dev_name(dev);
card->dai_link = dai_link;
card->num_links = 1;
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c
index 8af89a263594..30497152e02a 100644
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -215,6 +215,10 @@ static const struct reg_sequence cs35l56_asp1_defaults[] = {
REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100),
REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018),
REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018),
+ REG_SEQ0(CS35L56_ASP1TX1_INPUT, 0x00000000),
+ REG_SEQ0(CS35L56_ASP1TX2_INPUT, 0x00000000),
+ REG_SEQ0(CS35L56_ASP1TX3_INPUT, 0x00000000),
+ REG_SEQ0(CS35L56_ASP1TX4_INPUT, 0x00000000),
};
/*
diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c
index 901b9dbcf585..d9ab003e166b 100644
--- a/sound/soc/codecs/cs42l43-jack.c
+++ b/sound/soc/codecs/cs42l43-jack.c
@@ -121,7 +121,7 @@ int cs42l43_set_jack(struct snd_soc_component *component,
priv->buttons[3] = 735;
}
- ret = cs42l43_find_index(priv, "cirrus,detect-us", 1000, &priv->detect_us,
+ ret = cs42l43_find_index(priv, "cirrus,detect-us", 50000, &priv->detect_us,
cs42l43_accdet_us, ARRAY_SIZE(cs42l43_accdet_us));
if (ret < 0)
goto error;
@@ -433,7 +433,7 @@ irqreturn_t cs42l43_button_press(int irq, void *data)
// Wait for 2 full cycles of comb filter to ensure good reading
queue_delayed_work(system_wq, &priv->button_press_work,
- msecs_to_jiffies(10));
+ msecs_to_jiffies(20));
return IRQ_HANDLED;
}
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index 03b539ba540f..6a4e42e5e35b 100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -857,12 +857,16 @@ static void es8326_jack_detect_handler(struct work_struct *work)
* set auto-check mode, then restart jack_detect_work after 400ms.
* Don't report jack status.
*/
- regmap_write(es8326->regmap, ES8326_INT_SOURCE,
- (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, 0x00);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
+ regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x00);
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
+ regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x10);
+ usleep_range(50000, 70000);
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE,
+ (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f);
regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08);
queue_delayed_work(system_wq, &es8326->jack_detect_work,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index cdb7ff7020e9..51187b1e0ed2 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -81,7 +81,7 @@ static const struct reg_sequence init_list[] = {
static const struct reg_sequence rt5650_init_list[] = {
{0xf6, 0x0100},
{RT5645_PWR_ANLG1, 0x02},
- {RT5645_IL_CMD3, 0x0018},
+ {RT5645_IL_CMD3, 0x6728},
};
static const struct reg_default rt5645_reg[] = {
@@ -3130,20 +3130,32 @@ static void rt5645_enable_push_button_irq(struct snd_soc_component *component,
bool enable)
{
struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+ int ret;
if (enable) {
snd_soc_dapm_force_enable_pin(dapm, "ADC L power");
snd_soc_dapm_force_enable_pin(dapm, "ADC R power");
snd_soc_dapm_sync(dapm);
+ snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2,
+ RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK,
+ RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_RST);
+ usleep_range(10000, 15000);
+ snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2,
+ RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK,
+ RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_NORM);
+ msleep(50);
+ ret = snd_soc_component_read(component, RT5645_INT_IRQ_ST);
+ pr_debug("%s read %x = %x\n", __func__, RT5645_INT_IRQ_ST,
+ snd_soc_component_read(component, RT5645_INT_IRQ_ST));
+ snd_soc_component_write(component, RT5645_INT_IRQ_ST, ret);
+ ret = snd_soc_component_read(component, RT5650_4BTN_IL_CMD1);
+ pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1,
+ snd_soc_component_read(component, RT5650_4BTN_IL_CMD1));
+ snd_soc_component_write(component, RT5650_4BTN_IL_CMD1, ret);
snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD1, 0x3, 0x3);
snd_soc_component_update_bits(component,
RT5645_INT_IRQ_ST, 0x8, 0x8);
- snd_soc_component_update_bits(component,
- RT5650_4BTN_IL_CMD2, 0x8000, 0x8000);
- snd_soc_component_read(component, RT5650_4BTN_IL_CMD1);
- pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1,
- snd_soc_component_read(component, RT5650_4BTN_IL_CMD1));
} else {
snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, 0x8000, 0x0);
snd_soc_component_update_bits(component, RT5645_INT_IRQ_ST, 0x8, 0x0);
diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h
index 90816b2c5489..bef74b29fd54 100644
--- a/sound/soc/codecs/rt5645.h
+++ b/sound/soc/codecs/rt5645.h
@@ -2011,6 +2011,12 @@
#define RT5645_ZCD_HP_DIS (0x0 << 15)
#define RT5645_ZCD_HP_EN (0x1 << 15)
+/* Buttons Inline Command Function 2 (0xe0) */
+#define RT5645_EN_4BTN_IL_MASK (0x1 << 15)
+#define RT5645_EN_4BTN_IL_EN (0x1 << 15)
+#define RT5645_RST_4BTN_IL_MASK (0x1 << 14)
+#define RT5645_RST_4BTN_IL_RST (0x0 << 14)
+#define RT5645_RST_4BTN_IL_NORM (0x1 << 14)
/* Codec Private Register definition */
/* DAC ADC Digital Volume (0x00) */
diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
index 8ca8bcd177ab..dfda6bb5c6f8 100644
--- a/sound/soc/codecs/rt711-sdw.c
+++ b/sound/soc/codecs/rt711-sdw.c
@@ -38,7 +38,9 @@ static bool rt711_readable_register(struct device *dev, unsigned int reg)
case 0x8300 ... 0x83ff:
case 0x9c00 ... 0x9cff:
case 0xb900 ... 0xb9ff:
+ case 0x752008:
case 0x752009:
+ case 0x75200b:
case 0x752011:
case 0x75201a:
case 0x752045:
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index b33da2215ade..87354bb1564e 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -68,6 +68,7 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re
case 0x200007f:
case 0x2000082 ... 0x200008e:
case 0x2000090 ... 0x2000094:
+ case 0x3110000:
case 0x5300000 ... 0x5300002:
case 0x5400002:
case 0x5600000 ... 0x5600007:
@@ -125,6 +126,7 @@ static bool rt722_sdca_mbq_volatile_register(struct device *dev, unsigned int re
case 0x2000067:
case 0x2000084:
case 0x2000086:
+ case 0x3110000:
return true;
default:
return false;
@@ -350,7 +352,7 @@ static int rt722_sdca_interrupt_callback(struct sdw_slave *slave,
if (status->sdca_cascade && !rt722->disable_irq)
mod_delayed_work(system_power_efficient_wq,
- &rt722->jack_detect_work, msecs_to_jiffies(30));
+ &rt722->jack_detect_work, msecs_to_jiffies(280));
mutex_unlock(&rt722->disable_irq_lock);
diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index 5ddc0c2fe53f..eb67689dcd6e 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -559,6 +559,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
+ priv->pdev = pdev;
+
cpu_np = of_parse_phandle(np, "audio-cpu", 0);
/* Give a chance to old DT binding */
if (!cpu_np)
@@ -787,7 +789,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
}
/* Initialize sound card */
- priv->pdev = pdev;
priv->card.dev = &pdev->dev;
priv->card.owner = THIS_MODULE;
ret = snd_soc_of_parse_card_name(&priv->card, "model");
diff --git a/sound/soc/fsl/imx-pcm-dma.c b/sound/soc/fsl/imx-pcm-dma.c
index 14e94270911c..4fa208d6a032 100644
--- a/sound/soc/fsl/imx-pcm-dma.c
+++ b/sound/soc/fsl/imx-pcm-dma.c
@@ -50,4 +50,5 @@ int imx_pcm_dma_init(struct platform_device *pdev)
}
EXPORT_SYMBOL_GPL(imx_pcm_dma_init);
+MODULE_DESCRIPTION("Freescale i.MX PCM DMA interface");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c
index 02bae207f6ec..b6c5d94a1554 100644
--- a/sound/soc/intel/avs/topology.c
+++ b/sound/soc/intel/avs/topology.c
@@ -1545,8 +1545,8 @@ static int avs_route_load(struct snd_soc_component *comp, int index,
{
struct snd_soc_acpi_mach *mach = dev_get_platdata(comp->card->dev);
size_t len = SNDRV_CTL_ELEM_ID_NAME_MAXLEN;
- char buf[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
int ssp_port, tdm_slot;
+ char *buf;
/* See parse_link_formatted_string() for dynamic naming when(s). */
if (!avs_mach_singular_ssp(mach))
@@ -1557,13 +1557,24 @@ static int avs_route_load(struct snd_soc_component *comp, int index,
return 0;
tdm_slot = avs_mach_ssp_tdm(mach, ssp_port);
+ buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
avs_ssp_sprint(buf, len, route->source, ssp_port, tdm_slot);
- strscpy((char *)route->source, buf, len);
+ route->source = buf;
+
+ buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
avs_ssp_sprint(buf, len, route->sink, ssp_port, tdm_slot);
- strscpy((char *)route->sink, buf, len);
+ route->sink = buf;
+
if (route->control) {
+ buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
avs_ssp_sprint(buf, len, route->control, ssp_port, tdm_slot);
- strscpy((char *)route->control, buf, len);
+ route->control = buf;
}
return 0;
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index b41a1147f1c3..a64d1989e28a 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -613,6 +613,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
{
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 101 CESIUM"),
+ },
+ .driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+ BYT_RT5640_JD_NOT_INV |
+ BYT_RT5640_DIFF_MIC |
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
+ },
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 140 CESIUM"),
},
.driver_data = (void *)(BYT_RT5640_IN1_MAP |
diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
index 48252fa9e39e..8e0ae3635a35 100644
--- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c
@@ -293,7 +293,7 @@ static const struct snd_soc_acpi_adr_device rt1318_1_single_adr[] = {
.adr = 0x000130025D131801,
.num_endpoints = 1,
.endpoints = &single_endpoint,
- .name_prefix = "rt1318"
+ .name_prefix = "rt1318-1"
}
};
diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
index acaf81fd6c9b..f848e14b091a 100644
--- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
+++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
@@ -31,7 +31,7 @@ struct mt8183_da7219_max98357_priv {
static struct snd_soc_jack_pin mt8183_da7219_max98357_jack_pins[] = {
{
- .pin = "Headphone",
+ .pin = "Headphones",
.mask = SND_JACK_HEADPHONE,
},
{
@@ -626,7 +626,7 @@ static struct snd_soc_codec_conf mt6358_codec_conf[] = {
};
static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = {
- SOC_DAPM_PIN_SWITCH("Headphone"),
+ SOC_DAPM_PIN_SWITCH("Headphones"),
SOC_DAPM_PIN_SWITCH("Headset Mic"),
SOC_DAPM_PIN_SWITCH("Speakers"),
SOC_DAPM_PIN_SWITCH("Line Out"),
@@ -634,7 +634,7 @@ static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = {
static const
struct snd_soc_dapm_widget mt8183_da7219_max98357_dapm_widgets[] = {
- SND_SOC_DAPM_HP("Headphone", NULL),
+ SND_SOC_DAPM_HP("Headphones", NULL),
SND_SOC_DAPM_MIC("Headset Mic", NULL),
SND_SOC_DAPM_SPK("Speakers", NULL),
SND_SOC_DAPM_SPK("Line Out", NULL),
@@ -680,7 +680,7 @@ static struct snd_soc_codec_conf mt8183_da7219_rt1015_codec_conf[] = {
};
static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = {
- SOC_DAPM_PIN_SWITCH("Headphone"),
+ SOC_DAPM_PIN_SWITCH("Headphones"),
SOC_DAPM_PIN_SWITCH("Headset Mic"),
SOC_DAPM_PIN_SWITCH("Left Spk"),
SOC_DAPM_PIN_SWITCH("Right Spk"),
@@ -689,7 +689,7 @@ static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = {
static const
struct snd_soc_dapm_widget mt8183_da7219_rt1015_dapm_widgets[] = {
- SND_SOC_DAPM_HP("Headphone", NULL),
+ SND_SOC_DAPM_HP("Headphones", NULL),
SND_SOC_DAPM_MIC("Headset Mic", NULL),
SND_SOC_DAPM_SPK("Left Spk", NULL),
SND_SOC_DAPM_SPK("Right Spk", NULL),
diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
index ca8751190520..2832ef78eaed 100644
--- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c
+++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
@@ -827,6 +827,7 @@ SND_SOC_DAILINK_DEFS(ETDM2_IN_BE,
SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE,
DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")),
+ DAILINK_COMP_ARRAY(COMP_EMPTY()),
DAILINK_COMP_ARRAY(COMP_EMPTY()));
SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE,
diff --git a/sound/soc/mxs/mxs-pcm.c b/sound/soc/mxs/mxs-pcm.c
index df2e4be992d2..9bb08cadeb18 100644
--- a/sound/soc/mxs/mxs-pcm.c
+++ b/sound/soc/mxs/mxs-pcm.c
@@ -43,4 +43,5 @@ int mxs_pcm_platform_register(struct device *dev)
}
EXPORT_SYMBOL_GPL(mxs_pcm_platform_register);
+MODULE_DESCRIPTION("MXS ASoC PCM driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
index 68a38f63a2db..66b911b49e3f 100644
--- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
+++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
@@ -141,14 +141,17 @@ static void q6apm_lpass_dai_shutdown(struct snd_pcm_substream *substream, struct
struct q6apm_lpass_dai_data *dai_data = dev_get_drvdata(dai->dev);
int rc;
- if (!dai_data->is_port_started[dai->id])
- return;
- rc = q6apm_graph_stop(dai_data->graph[dai->id]);
- if (rc < 0)
- dev_err(dai->dev, "fail to close APM port (%d)\n", rc);
+ if (dai_data->is_port_started[dai->id]) {
+ rc = q6apm_graph_stop(dai_data->graph[dai->id]);
+ dai_data->is_port_started[dai->id] = false;
+ if (rc < 0)
+ dev_err(dai->dev, "fail to close APM port (%d)\n", rc);
+ }
- q6apm_graph_close(dai_data->graph[dai->id]);
- dai_data->is_port_started[dai->id] = false;
+ if (dai_data->graph[dai->id]) {
+ q6apm_graph_close(dai_data->graph[dai->id]);
+ dai_data->graph[dai->id] = NULL;
+ }
}
static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
@@ -163,8 +166,10 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s
q6apm_graph_stop(dai_data->graph[dai->id]);
dai_data->is_port_started[dai->id] = false;
- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
q6apm_graph_close(dai_data->graph[dai->id]);
+ dai_data->graph[dai->id] = NULL;
+ }
}
/**
@@ -183,26 +188,29 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s
cfg->direction = substream->stream;
rc = q6apm_graph_media_format_pcm(dai_data->graph[dai->id], cfg);
-
if (rc) {
dev_err(dai->dev, "Failed to set media format %d\n", rc);
- return rc;
+ goto err;
}
rc = q6apm_graph_prepare(dai_data->graph[dai->id]);
if (rc) {
dev_err(dai->dev, "Failed to prepare Graph %d\n", rc);
- return rc;
+ goto err;
}
rc = q6apm_graph_start(dai_data->graph[dai->id]);
if (rc < 0) {
dev_err(dai->dev, "fail to start APM port %x\n", dai->id);
- return rc;
+ goto err;
}
dai_data->is_port_started[dai->id] = true;
return 0;
+err:
+ q6apm_graph_close(dai_data->graph[dai->id]);
+ dai_data->graph[dai->id] = NULL;
+ return rc;
}
static int q6apm_lpass_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c
index eaa8bb016e50..f2eda2ff46c0 100644
--- a/sound/soc/qcom/sdw.c
+++ b/sound/soc/qcom/sdw.c
@@ -160,4 +160,5 @@ int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream,
return 0;
}
EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free);
+MODULE_DESCRIPTION("Qualcomm ASoC SoundWire helper functions");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
index 9fa020ef7eab..ee517d7b5b7b 100644
--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
+++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
@@ -655,8 +655,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream,
int err;
if (i2s_tdm->is_master_mode) {
- struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
- i2s_tdm->mclk_tx : i2s_tdm->mclk_rx;
+ struct clk *mclk;
+
+ if (i2s_tdm->clk_trcm == TRCM_TX) {
+ mclk = i2s_tdm->mclk_tx;
+ } else if (i2s_tdm->clk_trcm == TRCM_RX) {
+ mclk = i2s_tdm->mclk_rx;
+ } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ mclk = i2s_tdm->mclk_tx;
+ } else {
+ mclk = i2s_tdm->mclk_rx;
+ }
err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params));
if (err)
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index ea3bc9318412..a63e942fdc0b 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -318,6 +318,12 @@ static int dmaengine_copy(struct snd_soc_component *component,
return 0;
}
+static int dmaengine_pcm_sync_stop(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ return snd_dmaengine_pcm_sync_stop(substream);
+}
+
static const struct snd_soc_component_driver dmaengine_pcm_component = {
.name = SND_DMAENGINE_PCM_DRV_NAME,
.probe_order = SND_SOC_COMP_ORDER_LATE,
@@ -327,6 +333,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component = {
.trigger = dmaengine_pcm_trigger,
.pointer = dmaengine_pcm_pointer,
.pcm_construct = dmaengine_pcm_new,
+ .sync_stop = dmaengine_pcm_sync_stop,
};
static const struct snd_soc_component_driver dmaengine_pcm_component_process = {
@@ -339,6 +346,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component_process = {
.pointer = dmaengine_pcm_pointer,
.copy = dmaengine_copy,
.pcm_construct = dmaengine_pcm_new,
+ .sync_stop = dmaengine_pcm_sync_stop,
};
static const char * const dmaengine_pcm_dma_channel_names[] = {
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 90ca37e008b3..6951ff7bc61e 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1021,6 +1021,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
struct snd_soc_tplg_hdr *hdr)
{
struct snd_soc_dapm_context *dapm = &tplg->comp->dapm;
+ const size_t maxlen = SNDRV_CTL_ELEM_ID_NAME_MAXLEN;
struct snd_soc_tplg_dapm_graph_elem *elem;
struct snd_soc_dapm_route *route;
int count, i;
@@ -1044,31 +1045,27 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
tplg->pos += sizeof(struct snd_soc_tplg_dapm_graph_elem);
/* validate routes */
- if (strnlen(elem->source, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
+ if ((strnlen(elem->source, maxlen) == maxlen) ||
+ (strnlen(elem->sink, maxlen) == maxlen) ||
+ (strnlen(elem->control, maxlen) == maxlen)) {
ret = -EINVAL;
break;
}
- if (strnlen(elem->sink, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
- ret = -EINVAL;
- break;
- }
- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
- ret = -EINVAL;
+
+ route->source = devm_kstrdup(tplg->dev, elem->source, GFP_KERNEL);
+ route->sink = devm_kstrdup(tplg->dev, elem->sink, GFP_KERNEL);
+ if (!route->source || !route->sink) {
+ ret = -ENOMEM;
break;
}
- route->source = elem->source;
- route->sink = elem->sink;
-
- /* set to NULL atm for tplg users */
- route->connected = NULL;
- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0)
- route->control = NULL;
- else
- route->control = elem->control;
+ if (strnlen(elem->control, maxlen) != 0) {
+ route->control = devm_kstrdup(tplg->dev, elem->control, GFP_KERNEL);
+ if (!route->control) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
/* add route dobj to dobj_list */
route->dobj.type = SND_SOC_DOBJ_GRAPH;
diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c
index ce675c22a5ab..1c823f9eea57 100644
--- a/sound/soc/sof/intel/hda-dai.c
+++ b/sound/soc/sof/intel/hda-dai.c
@@ -379,7 +379,7 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream,
sdev = widget_to_sdev(w);
if (sdev->dspless_mode_selected)
- goto skip_tlv;
+ return 0;
/* get stream_id */
hext_stream = ops->get_hext_stream(sdev, cpu_dai, substream);
@@ -423,7 +423,6 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream,
dma_config->dma_stream_channel_map.device_count = 1;
dma_config->dma_priv_config_size = 0;
-skip_tlv:
return 0;
}
@@ -525,6 +524,9 @@ int sdw_hda_dai_hw_params(struct snd_pcm_substream *substream,
return ret;
}
+ if (sdev->dspless_mode_selected)
+ return 0;
+
ipc4_copier = widget_to_copier(w);
dma_config_tlv = &ipc4_copier->dma_config_tlv[cpu_dai_id];
dma_config = &dma_config_tlv->dma_config;
@@ -615,12 +617,6 @@ static int hda_dai_suspend(struct hdac_bus *bus)
sdai = swidget->private;
ops = sdai->platform_private;
- ret = hda_link_dma_cleanup(hext_stream->link_substream,
- hext_stream,
- cpu_dai);
- if (ret < 0)
- return ret;
-
/* for consistency with TRIGGER_SUSPEND */
if (ops->post_trigger) {
ret = ops->post_trigger(sdev, cpu_dai,
@@ -629,6 +625,12 @@ static int hda_dai_suspend(struct hdac_bus *bus)
if (ret < 0)
return ret;
}
+
+ ret = hda_link_dma_cleanup(hext_stream->link_substream,
+ hext_stream,
+ cpu_dai);
+ if (ret < 0)
+ return ret;
}
}
diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c
index 9fb8521b896b..f6e24edd7adb 100644
--- a/sound/soc/sof/intel/hda-pcm.c
+++ b/sound/soc/sof/intel/hda-pcm.c
@@ -258,6 +258,12 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev,
snd_pcm_hw_constraint_integer(substream->runtime,
SNDRV_PCM_HW_PARAM_PERIODS);
+ /* Limit the maximum number of periods to not exceed the BDL entries count */
+ if (runtime->hw.periods_max > HDA_DSP_MAX_BDL_ENTRIES)
+ snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS,
+ runtime->hw.periods_min,
+ HDA_DSP_MAX_BDL_ENTRIES);
+
/* Only S16 and S32 supported by HDA hardware when used without DSP */
if (sdev->dspless_mode_selected)
snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT,
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index b3ac040811e7..ef9318947d74 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -485,7 +485,7 @@ sink_prepare:
if (ret < 0) {
/* unprepare the source widget */
if (widget_ops[widget->id].ipc_unprepare &&
- swidget && swidget->prepared) {
+ swidget && swidget->prepared && swidget->use_count == 0) {
widget_ops[widget->id].ipc_unprepare(swidget);
swidget->prepared = false;
}
diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c
index 1e760c315521..2b1ed91a736c 100644
--- a/sound/soc/ti/davinci-mcasp.c
+++ b/sound/soc/ti/davinci-mcasp.c
@@ -1472,10 +1472,11 @@ static int davinci_mcasp_hw_rule_min_periodsize(
{
struct snd_interval *period_size = hw_param_interval(params,
SNDRV_PCM_HW_PARAM_PERIOD_SIZE);
+ u8 numevt = *((u8 *)rule->private);
struct snd_interval frames;
snd_interval_any(&frames);
- frames.min = 64;
+ frames.min = numevt;
frames.integer = 1;
return snd_interval_refine(period_size, &frames);
@@ -1490,6 +1491,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream,
u32 max_channels = 0;
int i, dir, ret;
int tdm_slots = mcasp->tdm_slots;
+ u8 *numevt;
/* Do not allow more then one stream per direction */
if (mcasp->substreams[substream->stream])
@@ -1589,9 +1591,12 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream,
return ret;
}
+ numevt = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+ &mcasp->txnumevt :
+ &mcasp->rxnumevt;
snd_pcm_hw_rule_add(substream->runtime, 0,
SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
- davinci_mcasp_hw_rule_min_periodsize, NULL,
+ davinci_mcasp_hw_rule_min_periodsize, numevt,
SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
return 0;
diff --git a/sound/soc/ti/omap-hdmi.c b/sound/soc/ti/omap-hdmi.c
index 639bc83f4263..cf43ac19c4a6 100644
--- a/sound/soc/ti/omap-hdmi.c
+++ b/sound/soc/ti/omap-hdmi.c
@@ -354,11 +354,7 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev)
if (!card)
return -ENOMEM;
- card->name = devm_kasprintf(dev, GFP_KERNEL,
- "HDMI %s", dev_name(ad->dssdev));
- if (!card->name)
- return -ENOMEM;
-
+ card->name = "HDMI";
card->owner = THIS_MODULE;
card->dai_link =
devm_kzalloc(dev, sizeof(*(card->dai_link)), GFP_KERNEL);
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index bb52871da341..2e60e2c212cd 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -17,6 +17,7 @@ endif
MAKEFLAGS += -r
override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+override CFLAGS += -Wno-address-of-packed-member
ALL_TARGETS := hv_kvp_daemon hv_vss_daemon
ifneq ($(ARCH), aarch64)
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 233f2b6edf52..49b79cf0c5cc 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str)
return result;
}
-static int comm_str__cmp(const void *_lhs, const void *_rhs)
-{
- const struct comm_str *lhs = *(const struct comm_str * const *)_lhs;
- const struct comm_str *rhs = *(const struct comm_str * const *)_rhs;
-
- return strcmp(comm_str__str(lhs), comm_str__str(rhs));
-}
-
static int comm_str__search(const void *_key, const void *_member)
{
const char *key = _key;
@@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str)
}
result = comm_str__new(str);
if (result) {
- comm_strs->strs[comm_strs->num_strs++] = result;
- qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *),
- comm_str__cmp);
+ int low = 0, high = comm_strs->num_strs - 1;
+ int insert = comm_strs->num_strs; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert],
+ (comm_strs->num_strs - insert) * sizeof(struct comm_str *));
+ comm_strs->num_strs++;
+ comm_strs->strs[insert] = result;
}
}
up_write(&comm_strs->lock);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index ab3d0c01dd63..a69a9c661200 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso)
dsos->dsos = temp;
dsos->allocated = to_allocate;
}
- dsos->dsos[dsos->cnt++] = dso__get(dso);
- if (dsos->cnt >= 2 && dsos->sorted) {
- dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2],
- &dsos->dsos[dsos->cnt - 1])
- <= 0;
+ if (!dsos->sorted) {
+ dsos->dsos[dsos->cnt++] = dso__get(dso);
+ } else {
+ int low = 0, high = dsos->cnt - 1;
+ int insert = dsos->cnt; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert],
+ (dsos->cnt - insert) * sizeof(struct dso *));
+ dsos->cnt++;
+ dsos->dsos[insert] = dso__get(dso);
}
dso__set_dsos(dso, dsos);
return 0;
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 2d6dce2c8f77..b1e6817f1e54 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -14,6 +14,7 @@ turbostat : turbostat.c
override CFLAGS += -O2 -Wall -Wextra -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"'
override CFLAGS += -D_FILE_OFFSET_BITS=64
override CFLAGS += -D_FORTIFY_SOURCE=2
@@ -44,10 +45,13 @@ snapshot: turbostat
@echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
@echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
+ @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h
+
@echo PWD=. > $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile
- @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
+ @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile
+ @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
@rm -f $(SNAPSHOT).tar.gz
tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT)
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8cdf41906e98..9f5d053d4bc6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -10,6 +10,7 @@
#define _GNU_SOURCE
#include MSRHEADER
#include INTEL_FAMILY_HEADER
+#include BUILD_BUG_HEADER
#include <stdarg.h>
#include <stdio.h>
#include <err.h>
@@ -38,7 +39,6 @@
#include <stdbool.h>
#include <assert.h>
#include <linux/kernel.h>
-#include <linux/build_bug.h>
#define UNUSED(x) (void)(x)
@@ -5695,9 +5695,6 @@ static void probe_intel_uncore_frequency_cluster(void)
if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
return;
- if (quiet)
- return;
-
for (uncore_max_id = 0;; ++uncore_max_id) {
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id);
@@ -5727,6 +5724,14 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/fabric_cluster_id", path_base);
cluster_id = read_sysfs_int(path);
+ sprintf(path, "%s/current_freq_khz", path_base);
+ sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
+
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+
+ if (quiet)
+ continue;
+
sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
sprintf(path, "%s/max_freq_khz", path_base);
@@ -5743,11 +5748,6 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
k = read_sysfs_int(path);
fprintf(outf, " %d MHz\n", k / 1000);
-
- sprintf(path, "%s/current_freq_khz", path_base);
- sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
-
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
}
}
@@ -8424,7 +8424,7 @@ void cmdline(int argc, char **argv)
* Parse some options early, because they may make other options invalid,
* like adding the MSR counter with --add and at the same time using --no-msr.
*/
- while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) {
switch (opt) {
case 'M':
no_msr = 1;
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 3482248aa344..90d5afd52dd0 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+ struct device *dev = &port->dev;
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
+ dev_set_drvdata(dev, cxlhdm);
return cxlhdm;
}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e0b3887b3d2d..dd49c1d23a60 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index eeabd798bc3a..98b6b6a886ce 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -58,9 +58,12 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 4c6f42dae409..da430df45aa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,9 +12,11 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
+
#include "test_ringbuf.lskel.h"
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
+#include "test_ringbuf_write.lskel.h"
#define EDONE 7777
@@ -84,6 +86,58 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
+static void ringbuf_write_subtest(void)
+{
+ struct test_ringbuf_write_lskel *skel;
+ int page_size = getpagesize();
+ size_t *mmap_ptr;
+ int err, rb_fd;
+
+ skel = test_ringbuf_write_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = 0x4000;
+
+ err = test_ringbuf_write_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
+ goto cleanup;
+ *mmap_ptr = 0x3000;
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
+ goto cleanup;
+
+ err = test_ringbuf_write_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup_ringbuf;
+
+ skel->bss->discarded = 0;
+ skel->bss->passed = 0;
+
+ /* trigger exactly two samples */
+ syscall(__NR_getpgid);
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->discarded, 2, "discarded");
+ ASSERT_EQ(skel->bss->passed, 0, "passed");
+
+ test_ringbuf_write_lskel__detach(skel);
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_write_lskel__destroy(skel);
+}
+
static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
@@ -451,4 +505,6 @@ void test_ringbuf(void)
ringbuf_n_subtest();
if (test__start_subtest("ringbuf_map_key"))
ringbuf_map_key_subtest();
+ if (test__start_subtest("ringbuf_write"))
+ ringbuf_write_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index bc9841144685..1af9ec1149aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -9,6 +9,8 @@
#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
#include "test_tc_link.skel.h"
+
+#include "netlink_helpers.h"
#include "tc_helpers.h"
void serial_test_tc_links_basic(void)
@@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void)
test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
}
+struct qdisc_req {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[1024];
+};
+
+static int qdisc_replace(int ifindex, const char *kind, bool block)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct qdisc_req req;
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_NEWQDISC;
+ req.t.tcm_family = AF_UNSPEC;
+ req.t.tcm_ifindex = ifindex;
+ req.t.tcm_parent = 0xfffffff1;
+
+ addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
+ if (block)
+ addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+void serial_test_tc_links_dev_chain0(void)
+{
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
+ ifindex = if_nametoindex("foo");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+ err = qdisc_replace(ifindex, "ingress", true);
+ if (!ASSERT_OK(err, "attaching ingress"))
+ goto cleanup;
+ ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
+ err = qdisc_replace(ifindex, "clsact", false);
+ if (!ASSERT_OK(err, "attaching clsact"))
+ goto cleanup;
+ /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
+ * triggered the issue without the fix reliably 100% of the time.
+ */
+ sleep(5);
+ ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
+cleanup:
+ ASSERT_OK(system("ip link del dev foo"), "del veth");
+ ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
+ ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
+}
+
static void test_tc_links_dev_mixed(int target)
{
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 000000000000..871d16cb95cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ skel = timer_lockup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 1c9c4ec1be11..98ef39efa77e 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -53,6 +53,7 @@
#include "verifier_movsx.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_or_jmp32_k.skel.h"
#include "verifier_precision.skel.h"
#include "verifier_prevent_map_lookup.skel.h"
#include "verifier_raw_stack.skel.h"
@@ -170,6 +171,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); }
void test_verifier_precision(void) { RUN(verifier_precision); }
void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
new file mode 100644
index 000000000000..350513c0e4c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* outputs */
+long passed = 0;
+long discarded = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_write(void *ctx)
+{
+ int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
+ void *sample1, *sample2;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample1)
+ return 0;
+ /* first one can pass */
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample2) {
+ bpf_ringbuf_discard(sample1, 0);
+ __sync_fetch_and_add(&discarded, 1);
+ return 0;
+ }
+ /* second one must not */
+ __sync_fetch_and_add(&passed, 1);
+ foo = sample2 + 4084;
+ *foo = 256;
+ bpf_ringbuf_discard(sample1, 0);
+ bpf_ringbuf_discard(sample2, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 000000000000..3e520133281e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index bd676d7e615f..80c737b6d340 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void)
);
}
+int tmp_var;
+SEC("socket")
+__failure __msg("infinite loop detected at insn 2")
+__naked void jgt_imm64_and_may_goto(void)
+{
+ asm volatile (" \
+ r0 = %[tmp_var] ll; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -3; /* off -3 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm_addr(tmp_var)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("infinite loop detected at insn 1")
+__naked void may_goto_self(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -1; /* off -1 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void may_goto_neg_off(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+ goto l0_%=; \
+ goto l1_%=; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -2; /* off -2 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
SEC("tc")
__failure
__flag(BPF_F_TEST_STATE_FREQ)
@@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb)
return 0;
}
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto2(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void jlt_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: call %[bpf_jiffies64]; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ if r0 < 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm(bpf_jiffies64)
+ : __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+SEC("socket")
+__success __retval(0)
+__naked void gotol_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ gotol l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+#endif
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto_subprog(void)
+{
+ asm volatile (" \
+ call subprog_with_may_goto; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void subprog_with_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
#define ARR_SZ 1000000
int zero;
char arr[ARR_SZ];
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index cbb9d6714f53..028ec855587b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -224,6 +224,69 @@ l0_%=: \
: __clobber_all);
}
+SEC("socket")
+__description("MOV32SX, S8, var_off u32_max")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0")
+__naked void mov64sx_s32_varoff_1(void)
+{
+ asm volatile (" \
+l0_%=: \
+ r3 = *(u8 *)(r10 -387); \
+ w7 = (s8)w3; \
+ if w7 >= 0x2533823b goto l0_%=; \
+ w0 = 0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ w7 = (s8)w3; \
+ if w7 s>= 16 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ r3 |= 0x80; \
+ w7 = (s8)w3; \
+ if w7 s>= -5 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
new file mode 100644
index 000000000000..f37713a265ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("or_jmp32_k: bit ops + branch on unknown value")
+__failure
+__msg("R0 invalid mem access 'scalar'")
+__naked void or_jmp32_k(void)
+{
+ asm volatile (" \
+ r0 = 0xffffffff; \
+ r0 /= 1; \
+ r1 = 0; \
+ w1 = -1; \
+ w1 >>= 1; \
+ w0 &= w1; \
+ w0 |= 2; \
+ if w0 != 0x7ffffffd goto l1; \
+ r0 = 1; \
+ exit; \
+l3: \
+ r0 = 5; \
+ *(u64*)(r0 - 8) = r0; \
+ exit; \
+l2: \
+ w0 -= 0xe; \
+ if w0 == 1 goto l3; \
+ r0 = 4; \
+ exit; \
+l1: \
+ w0 -= 0x7ffffff0; \
+ if w0 s>= 0xe goto l2; \
+ r0 = 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config
index f35de0542b60..bcf7555eaffe 100644
--- a/tools/testing/selftests/drivers/net/virtio_net/config
+++ b/tools/testing/selftests/drivers/net/virtio_net/config
@@ -1,2 +1,8 @@
-CONFIG_VIRTIO_NET=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=m
CONFIG_VIRTIO_DEBUG=y
+CONFIG_VIRTIO_NET=y
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
index 71ec34bf1501..4373cea79b79 100644
--- a/tools/testing/selftests/fchmodat2/Makefile
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -1,6 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
TEST_GEN_PROGS := fchmodat2_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 07a0d5b545ca..3af3136e35a4 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := statmount_test
+TEST_GEN_PROGS := statmount_test statmount_test_ns
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
new file mode 100644
index 000000000000..f4294bab9d73
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __STATMOUNT_H
+#define __STATMOUNT_H
+
+#include <stdint.h>
+#include <linux/mount.h>
+#include <asm/unistd.h>
+
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+ uint64_t last_mnt_id, uint64_t list[], size_t num,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+#endif /* __STATMOUNT_H */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index e6d7c4f1c85b..c773334bbcc9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -4,17 +4,15 @@
#include <assert.h>
#include <stddef.h>
-#include <stdint.h>
#include <sched.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/statfs.h>
-#include <linux/mount.h>
#include <linux/stat.h>
-#include <asm/unistd.h>
+#include "statmount.h"
#include "../../kselftest.h"
static const char *const known_fs[] = {
@@ -36,18 +34,6 @@ static const char *const known_fs[] = {
"ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
"zonefs", NULL };
-static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
- size_t bufsize, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = mask,
- };
-
- return syscall(__NR_statmount, &req, buf, bufsize, flags);
-}
-
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
@@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne
int ret;
for (;;) {
- ret = statmount(mnt_id, mask, tmp, bufsize, flags);
+ ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (ret != -1)
break;
if (tofree)
@@ -121,12 +107,20 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
static int orig_root;
static uint64_t root_id, parent_id;
static uint32_t old_root_id, old_parent_id;
-
+static FILE *f_mountinfo;
static void cleanup_namespace(void)
{
- fchdir(orig_root);
- chroot(".");
+ int ret;
+
+ ret = fchdir(orig_root);
+ if (ret == -1)
+ ksft_perror("fchdir to original root");
+
+ ret = chroot(".");
+ if (ret == -1)
+ ksft_perror("chroot to original root");
+
umount2(root_mntpoint, MNT_DETACH);
rmdir(root_mntpoint);
}
@@ -138,7 +132,7 @@ static void setup_namespace(void)
uid_t uid = getuid();
gid_t gid = getgid();
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
@@ -149,6 +143,11 @@ static void setup_namespace(void)
sprintf(buf, "0 %d 1", gid);
write_file("/proc/self/gid_map", buf);
+ f_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!f_mountinfo)
+ ksft_exit_fail_msg("failed to open mountinfo: %s\n",
+ strerror(errno));
+
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1)
ksft_exit_fail_msg("making mount tree private: %s\n",
@@ -208,25 +207,13 @@ static int setup_mount_tree(int log2_num)
return 0;
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
- uint64_t list[], size_t num, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = last_mnt_id,
- };
-
- return syscall(__NR_listmount, &req, list, num, flags);
-}
-
static void test_listmount_empty_root(void)
{
ssize_t res;
const unsigned int size = 32;
uint64_t list[size];
- res = listmount(LSMT_ROOT, 0, list, size, 0);
+ res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -251,7 +238,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
- ret = statmount(root_id, 0, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@@ -277,7 +264,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@@ -337,7 +324,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@@ -462,6 +449,88 @@ static void test_statmount_fs_type(void)
free(sm);
}
+static void test_statmount_mnt_opts(void)
+{
+ struct statmount *sm;
+ const char *statmount_opts;
+ char *line = NULL;
+ size_t len = 0;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mnt opts: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ int i;
+ char *p, *p2;
+ unsigned int old_mnt_id;
+
+ old_mnt_id = atoi(line);
+ if (old_mnt_id != sm->mnt_id_old)
+ continue;
+
+ for (p = line, i = 0; p && i < 5; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+
+ p2 = strchr(p + 1, ' ');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+ p = strchr(p2 + 1, '-');
+ if (!p)
+ continue;
+ for (p++, i = 0; p && i < 2; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+ p++;
+
+ /* skip generic superblock options */
+ if (strncmp(p, "ro", 2) == 0)
+ p += 2;
+ else if (strncmp(p, "rw", 2) == 0)
+ p += 2;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "sync", 4) == 0)
+ p += 4;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "dirsync", 7) == 0)
+ p += 7;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "lazytime", 8) == 0)
+ p += 8;
+ if (*p == ',')
+ p++;
+ p2 = strrchr(p, '\n');
+ if (p2)
+ *p2 = '\0';
+
+ statmount_opts = sm->str + sm->mnt_opts;
+ if (strcmp(statmount_opts, p) != 0)
+ ksft_test_result_fail(
+ "unexpected mount options: '%s' != '%s'\n",
+ statmount_opts, p);
+ else
+ ksft_test_result_pass("statmount mount options\n");
+ free(sm);
+ free(line);
+ return;
+ }
+
+ ksft_test_result_fail("didnt't find mount entry\n");
+ free(sm);
+ free(line);
+}
+
static void test_statmount_string(uint64_t mask, size_t off, const char *name)
{
struct statmount *sm;
@@ -498,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
- ret = statmount(root_id, mask, sm, exactsize, 0);
+ ret = statmount(root_id, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
- ret = statmount(root_id, mask, sm, shortsize, 0);
+ ret = statmount(root_id, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@@ -533,7 +602,7 @@ static void test_listmount_tree(void)
if (res == -1)
return;
- num = res = listmount(LSMT_ROOT, 0, list, size, 0);
+ num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -545,7 +614,7 @@ static void test_listmount_tree(void)
}
for (i = 0; i < size - step;) {
- res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0);
+ res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
if (res == -1)
ksft_test_result_fail("short listmount: %s\n",
strerror(errno));
@@ -577,18 +646,18 @@ int main(void)
int ret;
uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
- STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE;
+ STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
ksft_print_header();
- ret = statmount(0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
- ksft_set_plan(14);
+ ksft_set_plan(15);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@@ -596,6 +665,7 @@ int main(void)
test_statmount_mnt_root();
test_statmount_mnt_point();
test_statmount_fs_type();
+ test_statmount_mnt_opts();
test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
new file mode 100644
index 000000000000..e044f5fc57fd
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <linux/nsfs.h>
+#include <linux/stat.h>
+
+#include "statmount.h"
+#include "../../kselftest.h"
+
+#define NSID_PASS 0
+#define NSID_FAIL 1
+#define NSID_SKIP 2
+#define NSID_ERROR 3
+
+static void handle_result(int ret, const char *testname)
+{
+ if (ret == NSID_PASS)
+ ksft_test_result_pass("%s\n", testname);
+ else if (ret == NSID_FAIL)
+ ksft_test_result_fail("%s\n", testname);
+ else if (ret == NSID_ERROR)
+ ksft_exit_fail_msg("%s\n", testname);
+ else
+ ksft_test_result_skip("%s\n", testname);
+}
+
+static inline int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
+ return -1;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
+ return -1;
+ }
+
+ ret = WEXITSTATUS(status);
+ return ret;
+}
+
+static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
+{
+ int fd = open(mnt_ns, O_RDONLY);
+
+ if (fd < 0) {
+ ksft_print_msg("failed to open for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ sleep(60);
+ return NSID_ERROR;
+ }
+
+ if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
+ ksft_print_msg("failed to get the nsid for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ return NSID_ERROR;
+ }
+ close(fd);
+ return NSID_PASS;
+}
+
+static int get_mnt_id(const char *path, uint64_t *mnt_id)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return NSID_ERROR;
+ }
+
+ *mnt_id = sx.stx_mnt_id;
+ return NSID_PASS;
+}
+
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return NSID_ERROR;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int setup_namespace(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret == -1)
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret != NSID_PASS)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret == -1) {
+ ksft_print_msg("making mount tree private: %s\n",
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int _test_statmount_mnt_ns_id(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ uint64_t root_id;
+ int ret;
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = get_mnt_id("/", &root_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ return NSID_FAIL;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+
+ return NSID_PASS;
+}
+
+static void test_statmount_mnt_ns_id(void)
+{
+ pid_t pid;
+ int ret;
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ /* We're the original pid, wait for the result. */
+ if (pid != 0) {
+ ret = wait_for_pid(pid);
+ handle_result(ret, "test statmount ns id");
+ return;
+ }
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id();
+ exit(ret);
+}
+
+static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
+{
+ uint64_t list[256];
+ uint64_t mnt_ns_id;
+ uint64_t nr_mounts;
+ char buf[256];
+ int ret;
+
+ /* Get the mount ns id for our child. */
+ snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
+ ret = get_mnt_ns_id(buf, &mnt_ns_id);
+
+ nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (nr_mounts != child_nr_mounts) {
+ ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
+ child_nr_mounts);
+ return NSID_FAIL;
+ }
+
+ /* Validate that all of our entries match our mnt_ns_id. */
+ for (int i = 0; i < nr_mounts; i++) {
+ struct statmount sm;
+
+ ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ sizeof(sm), 0);
+ if (ret < 0) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+ }
+
+ return NSID_PASS;
+}
+
+static void test_listmount_ns(void)
+{
+ uint64_t nr_mounts;
+ char pval;
+ int child_ready_pipe[2];
+ int parent_ready_pipe[2];
+ pid_t pid;
+ int ret, child_ret;
+
+ if (pipe(child_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the child pipe: %s\n",
+ strerror(errno));
+ if (pipe(parent_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
+ strerror(errno));
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ if (pid == 0) {
+ char cval;
+ uint64_t list[256];
+
+ close(child_ready_pipe[0]);
+ close(parent_ready_pipe[1]);
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+
+ nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ exit(NSID_FAIL);
+ }
+
+ /*
+ * Tell our parent how many mounts we have, and then wait for it
+ * to tell us we're done.
+ */
+ write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
+ read(parent_ready_pipe[0], &cval, sizeof(cval));
+ exit(NSID_PASS);
+ }
+
+ close(child_ready_pipe[1]);
+ close(parent_ready_pipe[0]);
+
+ /* Wait until the child has created everything. */
+ if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+
+ ret = validate_external_listmount(pid, nr_mounts);
+
+ if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
+ ret = NSID_ERROR;
+
+ child_ret = wait_for_pid(pid);
+ if (child_ret != NSID_PASS)
+ ret = child_ret;
+ handle_result(ret, "test listmount ns id");
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ ksft_set_plan(2);
+ test_statmount_mnt_ns_id();
+ test_listmount_ns();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index b634969cbb6f..40723a6a083f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -66,8 +66,6 @@
#include <sys/wait.h>
#include <unistd.h>
#include <setjmp.h>
-#include <syscall.h>
-#include <linux/sched.h>
#include "kselftest.h"
@@ -82,17 +80,6 @@
# define TH_LOG_ENABLED 1
#endif
-/* Wait for the child process to end but without sharing memory mapping. */
-static inline pid_t clone3_vfork(void)
-{
- struct clone_args args = {
- .flags = CLONE_VFORK,
- .exit_signal = SIGCHLD,
- };
-
- return syscall(__NR_clone3, &args, sizeof(args));
-}
-
/**
* TH_LOG()
*
@@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void)
} \
if (setjmp(_metadata->env) == 0) { \
/* _metadata and potentially self are shared with all forks. */ \
- child = clone3_vfork(); \
+ child = fork(); \
if (child == 0) { \
fixture_name##_setup(_metadata, self, variant->data); \
/* Let setup failure terminate early. */ \
@@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t)
.sa_flags = SA_SIGINFO,
};
struct sigaction saved_action;
- int status;
+ /*
+ * Sets status so that WIFEXITED(status) returns true and
+ * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
+ * should never be evaluated because of the waitpid(2) check and
+ * SIGALRM handling.
+ */
+ int status = KSFT_FAIL << 8;
+ int child;
if (sigaction(SIGALRM, &action, &saved_action)) {
t->exit_code = KSFT_FAIL;
@@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = t;
t->timed_out = false;
alarm(t->timeout);
- waitpid(t->pid, &status, 0);
+ child = waitpid(t->pid, &status, 0);
+ if (child == -1 && errno != EINTR) {
+ t->exit_code = KSFT_FAIL;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
+ return;
+ }
+
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->exit_code = KSFT_FAIL;
@@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t)
WTERMSIG(status));
}
} else {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test ended in some other way [%u]\n",
t->name,
@@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f,
struct __test_xfail *xfail;
char test_name[1024];
const char *diagnostic;
+ int child;
/* reset test struct */
t->exit_code = KSFT_PASS;
@@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f,
fflush(stdout);
fflush(stderr);
- t->pid = clone3_vfork();
- if (t->pid < 0) {
+ child = fork();
+ if (child < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->exit_code = KSFT_FAIL;
- } else if (t->pid == 0) {
+ } else if (child == 0) {
setpgrp();
t->fn(t, variant);
_exit(t->exit_code);
} else {
+ t->pid = child;
__wait_for_test(t);
}
ksft_print_msg(" %4s %s\n",
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 8eb57de0b587..c0c7c1fe93f9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -277,6 +277,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 14ee17151a59..b5035c63d516 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -9,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "sbi.h"
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index c664e446136b..594b061aef52 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1247,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
- uint8_t maxphyaddr;
+ uint8_t maxphyaddr, guest_maxphyaddr;
- max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+ /*
+ * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
+ * enumerates the max _mappable_ GPA, which can be less than the raw
+ * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+ * doesn't support 5-level TDP.
+ */
+ guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+ guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+ TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+ "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
+
+ max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
if (!host_cpu_is_amd)
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index 823c132069b4..0e0712854953 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -6,6 +6,7 @@
*
*/
#include "kvm_util.h"
+#include "ucall_common.h"
#define LABEL_ADDRESS(v) ((uint64_t)&(v))
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index 69bb94e6b227..f299cbfd23ca 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -15,6 +15,7 @@
#include "processor.h"
#include "sbi.h"
#include "arch_timer.h"
+#include "ucall_common.h"
/* Maximum counters(firmware + hardware) */
#define RISCV_MAX_PMU_COUNTERS 64
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
index 7a4a61be119b..3fb967f40c6a 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
@@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features)
int i;
for (i = 0; i < 64; i++) {
- if (!(supported_features & (1u << i)))
+ if (!(supported_features & BIT_ULL(i)))
test_init2_invalid(vm_type,
&(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
"unknown feature");
- else if (KNOWN_FEATURES & (1u << i))
+ else if (KNOWN_FEATURES & BIT_ULL(i))
test_init2(vm_type,
&(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 37de82da9be7..b61803e36d1c 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -656,12 +656,33 @@ unmap:
munmap(map, size);
}
+static void init_global_file_handles(void)
+{
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
+ ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+ if (ksm_full_scans_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+}
+
int main(int argc, char **argv)
{
unsigned int tests = 8;
int err;
if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+ init_global_file_handles();
exit(test_child_ksm());
}
@@ -674,22 +695,7 @@ int main(int argc, char **argv)
pagesize = getpagesize();
- mem_fd = open("/proc/self/mem", O_RDWR);
- if (mem_fd < 0)
- ksft_exit_fail_msg("opening /proc/self/mem failed\n");
- ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
- if (ksm_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
- ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
- if (ksm_full_scans_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
- pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
- if (pagemap_fd < 0)
- ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
- proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
- proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
- O_RDONLY);
- ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ init_global_file_handles();
test_unmerge();
test_unmerge_zero_pages();
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index b74813fdc951..d53de2486080 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -67,7 +67,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: munmap failed!?\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
addr = base_addr + page_size;
size = 3 * page_size;
@@ -76,7 +77,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Exact same mapping again:
@@ -93,7 +95,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
@@ -111,7 +114,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Overlap end of existing mapping:
@@ -128,7 +132,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n");
/*
* Overlap start of existing mapping:
@@ -145,7 +150,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n");
/*
* Adjacent to start of existing mapping:
@@ -162,7 +168,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base\n");
/*
* Adjacent to end of existing mapping:
@@ -179,7 +186,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n");
addr = base_addr;
size = 5 * page_size;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 49a56eb5d036..666ab7d9390b 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -43,7 +43,6 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
-test_unix_oob
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index bd01e4a0be2c..d9393569d03a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += srv6_end_dx4_netfilter_test.sh
+TEST_PROGS += srv6_end_dx6_netfilter_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 3b83c797650d..50584479540b 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights
+TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
new file mode 100644
index 000000000000..37368567768c
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/config
@@ -0,0 +1,3 @@
+CONFIG_UNIX=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
new file mode 100644
index 000000000000..16d0c172eaeb
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+#define BUF_SZ 32
+
+FIXTURE(msg_oob)
+{
+ int fd[4]; /* 0: AF_UNIX sender
+ * 1: AF_UNIX receiver
+ * 2: TCP sender
+ * 3: TCP receiver
+ */
+ int signal_fd;
+ int epoll_fd[2]; /* 0: AF_UNIX receiver
+ * 1: TCP receiver
+ */
+ bool tcp_compliant;
+};
+
+FIXTURE_VARIANT(msg_oob)
+{
+ bool peek;
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, no_peek)
+{
+ .peek = false,
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, peek)
+{
+ .peek = true
+};
+
+static void create_unix_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int ret;
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(ret, 0);
+}
+
+static void create_tcp_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ int listen_fd;
+ int ret;
+
+ listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(listen_fd, 0);
+
+ ret = listen(listen_fd, -1);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[2] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd[2], 0);
+
+ ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GE(self->fd[3], 0);
+
+ ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK);
+ ASSERT_EQ(ret, 0);
+}
+
+static void setup_sigurg(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct signalfd_siginfo siginfo;
+ int pid = getpid();
+ sigset_t mask;
+ int i, ret;
+
+ for (i = 0; i < 2; i++) {
+ ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ret = sigemptyset(&mask);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigaddset(&mask, SIGURG);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ ASSERT_GE(self->signal_fd, 0);
+
+ ret = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ ASSERT_EQ(ret, -1);
+}
+
+static void setup_epollpri(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct epoll_event event = {
+ .events = EPOLLPRI,
+ };
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ self->epoll_fd[i] = epoll_create1(0);
+ ASSERT_GE(self->epoll_fd[i], 0);
+
+ ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void close_sockets(FIXTURE_DATA(msg_oob) *self)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ close(self->fd[i]);
+}
+
+FIXTURE_SETUP(msg_oob)
+{
+ create_unix_socketpair(_metadata, self);
+ create_tcp_socketpair(_metadata, self);
+
+ setup_sigurg(_metadata, self);
+ setup_epollpri(_metadata, self);
+
+ self->tcp_compliant = true;
+}
+
+FIXTURE_TEARDOWN(msg_oob)
+{
+ close_sockets(self);
+}
+
+static void __epollpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_remaining)
+{
+ struct epoll_event event[2] = {};
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++)
+ ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0);
+
+ ASSERT_EQ(ret[0], oob_remaining);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(ret[0], ret[1]);
+
+ if (oob_remaining) {
+ ASSERT_EQ(event[0].events, EPOLLPRI);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(event[0].events, event[1].events);
+ }
+}
+
+static void __sendpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *buf, size_t len, int flags)
+{
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++) {
+ struct signalfd_siginfo siginfo = {};
+ int bytes;
+
+ ret[i] = send(self->fd[i * 2], buf, len, flags);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+
+ if (flags & MSG_OOB) {
+ ASSERT_EQ(bytes, sizeof(siginfo));
+ ASSERT_EQ(siginfo.ssi_signo, SIGURG);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ }
+
+ ASSERT_EQ(bytes, -1);
+ }
+
+ ASSERT_EQ(ret[0], len);
+ ASSERT_EQ(ret[0], ret[1]);
+}
+
+static void __recvpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *expected_buf, int expected_len,
+ int buf_len, int flags)
+{
+ int i, ret[2], recv_errno[2], expected_errno = 0;
+ char recv_buf[2][BUF_SZ] = {};
+ bool printed = false;
+
+ ASSERT_GE(BUF_SZ, buf_len);
+
+ errno = 0;
+
+ for (i = 0; i < 2; i++) {
+ ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ recv_errno[i] = errno;
+ }
+
+ if (expected_len < 0) {
+ expected_errno = -expected_len;
+ expected_len = -1;
+ }
+
+ if (ret[0] != expected_len || recv_errno[0] != expected_errno) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(ret[0], expected_len);
+ ASSERT_EQ(recv_errno[0], expected_errno);
+ }
+
+ if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+
+ printed = true;
+
+ if (self->tcp_compliant) {
+ ASSERT_EQ(ret[0], ret[1]);
+ ASSERT_EQ(recv_errno[0], recv_errno[1]);
+ }
+ }
+
+ if (expected_len >= 0) {
+ int cmp;
+
+ cmp = strncmp(expected_buf, recv_buf[0], expected_len);
+ if (cmp) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(cmp, 0);
+ }
+
+ cmp = strncmp(recv_buf[0], recv_buf[1], expected_len);
+ if (cmp) {
+ if (!printed) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+ }
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(cmp, 0);
+ }
+ }
+}
+
+static void __setinlinepair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int i, oob_inline = 1;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE,
+ &oob_inline, sizeof(oob_inline));
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void __siocatmarkpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_head)
+{
+ int answ[2] = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ASSERT_EQ(answ[0], oob_head);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(answ[0], answ[1]);
+}
+
+#define sendpair(buf, len, flags) \
+ __sendpair(_metadata, self, buf, len, flags)
+
+#define recvpair(expected_buf, expected_len, buf_len, flags) \
+ do { \
+ if (variant->peek) \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, \
+ buf_len, (flags) | MSG_PEEK); \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, buf_len, flags); \
+ } while (0)
+
+#define epollpair(oob_remaining) \
+ __epollpair(_metadata, self, oob_remaining)
+
+#define siocatmarkpair(oob_head) \
+ __siocatmarkpair(_metadata, self, oob_head)
+
+#define setinlinepair() \
+ __setinlinepair(_metadata, self)
+
+#define tcp_incompliant \
+ for (self->tcp_compliant = false; \
+ self->tcp_compliant == false; \
+ self->tcp_compliant = true)
+
+TEST_F(msg_oob, non_oob)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 5, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_break_drop)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("ld", 2, 2, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_drop_2)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+ }
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ tcp_incompliant {
+ recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */
+ }
+
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("d", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob)
+{
+ setinlinepair();
+
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_break)
+{
+ setinlinepair();
+
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */
+ }
+
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("rld", 3, 3, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_no_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ setinlinepair();
+
+ sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ setinlinepair();
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP recv()s "y". */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(false);
+ }
+}
+
+TEST_F(msg_oob, inline_ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index 2bfed46e0b19..d66336256580 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -14,12 +14,12 @@
FIXTURE(scm_rights)
{
- int fd[16];
+ int fd[32];
};
FIXTURE_VARIANT(scm_rights)
{
- char name[16];
+ char name[32];
int type;
int flags;
bool test_listener;
@@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata,
const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
+ ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
+
if (variant->test_listener)
create_listeners(_metadata, self, n);
else
@@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge)
close_sockets(8);
}
+TEST_F(scm_rights, backtrack_from_scc)
+{
+ create_sockets(10);
+
+ send_fd(0, 1);
+ send_fd(0, 4);
+ send_fd(1, 2);
+ send_fd(2, 3);
+ send_fd(3, 1);
+
+ send_fd(5, 6);
+ send_fd(5, 9);
+ send_fd(6, 7);
+ send_fd(7, 8);
+ send_fd(8, 6);
+
+ close_sockets(10);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
deleted file mode 100644
index a7c51889acd5..000000000000
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ /dev/null
@@ -1,436 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-#include <sys/un.h>
-#include <sys/signal.h>
-#include <sys/poll.h>
-
-static int pipefd[2];
-static int signal_recvd;
-static pid_t producer_id;
-static char sock_name[32];
-
-static void sig_hand(int sn, siginfo_t *si, void *p)
-{
- signal_recvd = sn;
-}
-
-static int set_sig_handler(int signal)
-{
- struct sigaction sa;
-
- sa.sa_sigaction = sig_hand;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO | SA_RESTART;
-
- return sigaction(signal, &sa, NULL);
-}
-
-static void set_filemode(int fd, int set)
-{
- int flags = fcntl(fd, F_GETFL, 0);
-
- if (set)
- flags &= ~O_NONBLOCK;
- else
- flags |= O_NONBLOCK;
- fcntl(fd, F_SETFL, flags);
-}
-
-static void signal_producer(int fd)
-{
- char cmd;
-
- cmd = 'S';
- write(fd, &cmd, sizeof(cmd));
-}
-
-static void wait_for_signal(int fd)
-{
- char buf[5];
-
- read(fd, buf, 5);
-}
-
-static void die(int status)
-{
- fflush(NULL);
- unlink(sock_name);
- kill(producer_id, SIGTERM);
- exit(status);
-}
-
-int is_sioctatmark(int fd)
-{
- int ans = -1;
-
- if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
-#ifdef DEBUG
- perror("SIOCATMARK Failed");
-#endif
- }
- return ans;
-}
-
-void read_oob(int fd, char *c)
-{
-
- *c = ' ';
- if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
-#ifdef DEBUG
- perror("Reading MSG_OOB Failed");
-#endif
- }
-}
-
-int read_data(int pfd, char *buf, int size)
-{
- int len = 0;
-
- memset(buf, size, '0');
- len = read(pfd, buf, size);
-#ifdef DEBUG
- if (len < 0)
- perror("read failed");
-#endif
- return len;
-}
-
-static void wait_for_data(int pfd, int event)
-{
- struct pollfd pfds[1];
-
- pfds[0].fd = pfd;
- pfds[0].events = event;
- poll(pfds, 1, -1);
-}
-
-void producer(struct sockaddr_un *consumer_addr)
-{
- int cfd;
- char buf[64];
- int i;
-
- memset(buf, 'x', sizeof(buf));
- cfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
- wait_for_signal(pipefd[0]);
- if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(*consumer_addr)) != 0) {
- perror("Connect failed");
- kill(0, SIGTERM);
- exit(1);
- }
-
- for (i = 0; i < 2; i++) {
- /* Test 1: Test for SIGURG and OOB */
- wait_for_signal(pipefd[0]);
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 2: Test for OOB being overwitten */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '#';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 3: Test for SIOCATMARK */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- send(cfd, buf, sizeof(buf), 0);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 4: Test for 1byte OOB msg */
- memset(buf, 'x', sizeof(buf));
- buf[0] = '@';
- send(cfd, buf, 1, MSG_OOB);
- }
-}
-
-int
-main(int argc, char **argv)
-{
- int lfd, pfd;
- struct sockaddr_un consumer_addr, paddr;
- socklen_t len = sizeof(consumer_addr);
- char buf[1024];
- int on = 0;
- char oob;
- int atmark;
-
- lfd = socket(AF_UNIX, SOCK_STREAM, 0);
- memset(&consumer_addr, 0, sizeof(consumer_addr));
- consumer_addr.sun_family = AF_UNIX;
- sprintf(sock_name, "unix_oob_%d", getpid());
- unlink(sock_name);
- strcpy(consumer_addr.sun_path, sock_name);
-
- if ((bind(lfd, (struct sockaddr *)&consumer_addr,
- sizeof(consumer_addr))) != 0) {
- perror("socket bind failed");
- exit(1);
- }
-
- pipe(pipefd);
-
- listen(lfd, 1);
-
- producer_id = fork();
- if (producer_id == 0) {
- producer(&consumer_addr);
- exit(0);
- }
-
- set_sig_handler(SIGURG);
- signal_producer(pipefd[1]);
-
- pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
- fcntl(pfd, F_SETOWN, getpid());
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1:
- * veriyf that SIGURG is
- * delivered, 63 bytes are
- * read, oob is '@', and POLLPRI works.
- */
- wait_for_data(pfd, POLLPRI);
- read_oob(pfd, &oob);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 63 || oob != '@') {
- fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2:
- * Verify that the first OOB is over written by
- * the 2nd one and the first OOB is returned as
- * part of the read, and sigurg is received.
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = 0;
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- read_oob(pfd, &oob);
- if (!signal_recvd || len != 127 || oob != '#') {
- fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and atmark
- * is set.
- * oob is '%' and second read returns
- * 64 bytes.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 150)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr,
- "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
- signal_recvd, len, oob, atmark);
- die(1);
- }
-
- signal_recvd = 0;
-
- len = read_data(pfd, buf, 1024);
- if (len != 64) {
- fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4:
- * verify that a single byte
- * oob message is delivered.
- * set non blocking mode and
- * check proper error is
- * returned and sigurg is
- * received and correct
- * oob is read.
- */
-
- set_filemode(pfd, 0);
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if ((len == -1) && (errno == 11))
- len = 0;
-
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 0 || oob != '@') {
- fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- set_filemode(pfd, 1);
-
- /* Inline Testing */
-
- on = 1;
- if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
- perror("SO_OOBINLINE");
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1 -- Inline:
- * Check that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
- */
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
-
- if (!signal_recvd || len != 63) {
- fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
- signal_recvd, len);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
-
- if (len != 1) {
- fprintf(stderr,
- "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2 -- Inline:
- * Verify that the first OOB is over written by
- * the 2nd one and read breaks correctly on
- * 2nd OOB boundary with the first OOB returned as
- * part of the read, and sigurg is delivered and
- * siocatmark returns true.
- * next read returns one byte, the oob byte
- * and siocatmark returns false.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 127 || atmark != 1 || !signal_recvd) {
- fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
- len, atmark);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 1 || buf[0] != '#' || atmark == 1) {
- fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3 -- Inline:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and siocatmark
- * is true after the read.
- * subsequent read returns 65 bytes
- * because of oob which should be '%'.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 126)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (!signal_recvd || len != 127 || !atmark) {
- fprintf(stderr,
- "Test 3 Inline failed, sigurg %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 65 || buf[0] != '%' || atmark != 0) {
- fprintf(stderr,
- "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4 -- Inline:
- * verify that a single
- * byte oob message is delivered
- * and read returns one byte, the oob
- * byte and sigurg is received
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 1 || buf[0] != '@') {
- fprintf(stderr,
- "Test 4 Inline failed, signal %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
- die(0);
-}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 04de7a6ba6f3..d4891f7a2bfa 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -101,3 +101,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9e2981f2d7f5..9cb05978269d 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -160,10 +160,12 @@ make_connection()
local is_v6=$1
local app_port=$app4_port
local connect_addr="10.0.1.1"
+ local client_addr="10.0.1.2"
local listen_addr="0.0.0.0"
if [ "$is_v6" = "v6" ]
then
connect_addr="dead:beef:1::1"
+ client_addr="dead:beef:1::2"
listen_addr="::"
app_port=$app6_port
else
@@ -206,6 +208,7 @@ make_connection()
[ "$server_serverside" = 1 ]
then
test_pass
+ print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
mptcp_lib_result_print_all_tap
@@ -297,7 +300,7 @@ test_announce()
ip netns exec "$ns2"\
./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
ns2eth1
- print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port"
+ print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
"$client4_port"
@@ -306,7 +309,7 @@ test_announce()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl ann\
dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1
- print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port"
+ print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
"$client_addr_id" "$client6_port" "v6"
@@ -316,7 +319,7 @@ test_announce()
client_addr_id=$((client_addr_id+1))
ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
$client_addr_id dev ns2eth1 port $new4_port
- print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port"
+ print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
"$client_addr_id" "$new4_port"
@@ -327,7 +330,7 @@ test_announce()
# ADD_ADDR from the server to client machine reusing the subflow port
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$app4_port"
@@ -336,7 +339,7 @@ test_announce()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
"$server_addr_id" "$app6_port" "v6"
@@ -346,7 +349,7 @@ test_announce()
server_addr_id=$((server_addr_id+1))
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2 port $new4_port
- print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port"
+ print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$new4_port"
@@ -380,7 +383,7 @@ test_remove()
local invalid_token=$(( client4_token - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
$client_addr_id > /dev/null 2>&1
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
+ print_test "RM_ADDR id:client ns2 => ns1, invalid token"
local type
type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
@@ -394,7 +397,7 @@ test_remove()
local invalid_id=$(( client_addr_id + 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$invalid_id > /dev/null 2>&1
- print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
+ print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id"
type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
then
@@ -407,7 +410,7 @@ test_remove()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR id:client ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -416,7 +419,7 @@ test_remove()
client_addr_id=$(( client_addr_id - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR id:client-1 ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -424,7 +427,7 @@ test_remove()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
$client_addr_id
- print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR6 id:client-1 ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
@@ -434,7 +437,7 @@ test_remove()
# RM_ADDR from the server to client machine
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id
- print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR id:server ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -443,7 +446,7 @@ test_remove()
server_addr_id=$(( server_addr_id - 1 ))
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id
- print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR id:server-1 ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -451,7 +454,7 @@ test_remove()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
$server_addr_id
- print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR6 id:server-1 ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
}
@@ -479,8 +482,14 @@ verify_subflow_events()
local locid
local remid
local info
+ local e_dport_txt
- info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
+ # only display the fixed ports
+ if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then
+ e_dport_txt=":${e_dport}"
+ fi
+
+ info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})"
if [ "$e_type" = "$SUB_ESTABLISHED" ]
then
@@ -766,7 +775,7 @@ test_subflows_v4_v6_mix()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
"$server_addr_id" "$app6_port"
@@ -861,7 +870,7 @@ test_listener()
local listener_pid=$!
sleep 0.5
- print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
+ print_test "CREATE_LISTENER 10.0.2.2 (client port)"
verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
# ADD_ADDR from client to server machine reusing the subflow port
@@ -878,13 +887,14 @@ test_listener()
mptcp_lib_kill_wait $listener_pid
sleep 0.5
- print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
+ print_test "CLOSE_LISTENER 10.0.2.2 (client port)"
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
print_title "Make connections"
make_connection
make_connection "v6"
+print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)"
test_announce
test_remove
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index bdc03a2097e8..7ea5fb28c93d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -85,6 +85,7 @@ static bool cfg_rx;
static int cfg_runtime_ms = 4200;
static int cfg_verbose;
static int cfg_waittime_ms = 500;
+static int cfg_notification_limit = 32;
static bool cfg_zerocopy;
static socklen_t cfg_alen;
@@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
+static uint32_t sends_since_notify;
static unsigned long gettimeofday_ms(void)
{
@@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
error(1, errno, "send");
if (cfg_verbose && ret != len)
fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+ sends_since_notify++;
if (len) {
packets++;
@@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
- if (lo != next_completion)
+ if (cfg_verbose && lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
@@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
static void do_recv_completions(int fd, int domain)
{
while (do_recv_completion(fd, domain)) {}
+ sends_since_notify = 0;
}
/* Wait for all remaining completions on the errqueue */
@@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
else
do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+ if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
+ do_recv_completions(fd, domain);
+
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
do_recv_completions(fd, domain);
@@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv)
if (cfg_ifindex == 0)
error(1, errno, "invalid iface: %s", optarg);
break;
+ case 'l':
+ cfg_notification_limit = strtoul(optarg, NULL, 0);
+ break;
case 'm':
cfg_cork_mixed = true;
break;
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 5cae53543849..15bca0708717 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# OVS kernel module self tests
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 1dd057afd3fb..9f8dec2f6539 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -531,7 +531,7 @@ class ovsactions(nla):
for flat_act in parse_flat_map:
if parse_starts_block(actstr, flat_act[0], False):
actstr = actstr[len(flat_act[0]):]
- self["attrs"].append([flat_act[1]])
+ self["attrs"].append([flat_act[1], True])
actstr = actstr[strspn(actstr, ", ") :]
parsed = True
diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
new file mode 100755
index 000000000000..e23210aa547f
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
@@ -0,0 +1,335 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <[email protected]>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv4 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100|
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv4_HS_NETWORK=10.0.0
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
new file mode 100755
index 000000000000..9e69a2ed5bc3
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <[email protected]>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv6 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 |
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv6_HS_NETWORK=cafe
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
index 254d676a2689..185dc76ebb5f 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/openat2/Makefile
@@ -1,8 +1,18 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
+LOCAL_HDRS += helpers.h
+
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c helpers.h
+$(TEST_GEN_PROGS): helpers.c
diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk
index b909bee3cb2a..abb9e58d95c4 100644
--- a/tools/testing/selftests/powerpc/flags.mk
+++ b/tools/testing/selftests/powerpc/flags.mk
@@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown")
export GIT_VERSION
endif
-ifeq ($(CFLAGS),)
-CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS)
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS)
export CFLAGS
-endif
-
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index c7686fb6641a..55315ed695f4 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -291,11 +291,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
return ret;
}
+static bool arch_supports_noncont_cat(const struct resctrl_test *test)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ /* AMD always supports non-contiguous CBM. */
+ if (get_vendor() == ARCH_AMD)
+ return true;
+
+ /* Intel support for non-contiguous CBM needs to be discovered. */
+ if (!strcmp(test->resource, "L3"))
+ __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+ else if (!strcmp(test->resource, "L2"))
+ __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+ else
+ return false;
+
+ return ((ecx >> 3) & 1);
+}
+
static int noncont_cat_run_test(const struct resctrl_test *test,
const struct user_params *uparams)
{
unsigned long full_cache_mask, cont_mask, noncont_mask;
- unsigned int eax, ebx, ecx, edx, sparse_masks;
+ unsigned int sparse_masks;
int bit_center, ret;
char schemata[64];
@@ -304,15 +323,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test,
if (ret)
return ret;
- if (!strcmp(test->resource, "L3"))
- __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
- else if (!strcmp(test->resource, "L2"))
- __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
- else
- return -EINVAL;
-
- if (sparse_masks != ((ecx >> 3) & 1)) {
- ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+ if (arch_supports_noncont_cat(test) != sparse_masks) {
+ ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
return 1;
}
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
index 62397d5934f1..ed351a1cb917 100644
--- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *))
asm(".option push \n\
.option arch, +v \n\
- vsetivli x0, 1, e32, ta, ma \n\
+ vsetivli x0, 1, e32, m1, ta, ma \n\
vmv.s.x v0, %1 \n\
# Generate SIGSEGV \n\
lw a0, 0(x0) \n\
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index b83099160fbc..94886c82ae60 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -194,14 +194,14 @@ int main(int argc, char *argv[])
ksft_set_plan(7);
ksft_print_msg("Running on:\n");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("uname -a");
ksft_print_msg("Current BPF sysctl settings:\n");
/* Avoid using "sysctl" which may not be installed. */
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
affinity();
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index e40dc5be2f66..d12ff955de0d 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -30,7 +30,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
}
return 0;
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec, tst.tv_sec);
}
@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
/* Check that a child process is in the new timens. */
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
+ if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec + OFFSET, tst.tv_sec);
}
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5e7f0051bd7b..5b939f59dfa4 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime");
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index 9edd43d6b2c1..a4196bbd6e33 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime(%d)", clockid);
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index beb7614941fb..5b8907bf451d 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -32,7 +32,7 @@ static void *tcheck(void *_args)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ if (labs(tst.tv_sec - now->tv_sec) > 5) {
pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
args->tst_name, tst.tv_sec, now->tv_sec);
return (void *)1UL;
@@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5)
+ if (labs(tst.tv_sec - now->tv_sec) > 5)
return pr_fail("%s: unexpected value: %ld (%ld)\n",
tst_name, tst.tv_sec, now->tv_sec);
}
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index d53a4d8008f9..98d8ba2afa00 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,35 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
-include ../lib.mk
-
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+TEST_GEN_PROGS := vdso_test_gettimeofday
+TEST_GEN_PROGS += vdso_test_getcpu
+TEST_GEN_PROGS += vdso_test_abi
+TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
-TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+TEST_GEN_PROGS += vdso_standalone_test_x86
endif
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
+TEST_GEN_PROGS += vdso_test_correctness
CFLAGS := -std=gnu99
-CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
-LDFLAGS_vdso_test_correctness := -ldl
+
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
+
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
- $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
- vdso_standalone_test_x86.c parse_vdso.c \
- -o $@
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
- $(CC) $(CFLAGS) \
- vdso_test_correctness.c \
- -o $@ \
- $(LDFLAGS_vdso_test_correctness)
+$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 413f75620a35..4ae417372e9e 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -55,14 +55,20 @@ static struct vdso_info
ELF(Verdef) *verdef;
} vdso_info;
-/* Straight from the ELF specification. */
-static unsigned long elf_hash(const unsigned char *name)
+/*
+ * Straight from the ELF specification...and then tweaked slightly, in order to
+ * avoid a few clang warnings.
+ */
+static unsigned long elf_hash(const char *name)
{
unsigned long h = 0, g;
- while (*name)
+ const unsigned char *uch_name = (const unsigned char *)name;
+
+ while (*uch_name)
{
- h = (h << 4) + *name++;
- if (g = h & 0xf0000000)
+ h = (h << 4) + *uch_name++;
+ g = h & 0xf0000000;
+ if (g)
h ^= g >> 24;
h &= ~g;
}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 8a44ff973ee1..27f6fdf11969 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -18,7 +18,7 @@
#include "parse_vdso.h"
-/* We need a libc functions... */
+/* We need some libc functions... */
int strcmp(const char *a, const char *b)
{
/* This implementation is buggy: it never returns -1. */
@@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b)
return 0;
}
+/*
+ * The clang build needs this, although gcc does not.
+ * Stolen from lib/string.c.
+ */
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
/* ...and two syscalls. This is x86-specific. */
static inline long x86_syscall3(long nr, long a0, long a1, long a2)
{
@@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n)
}
}
-__attribute__((externally_visible)) void c_main(void **stack)
+void c_main(void **stack)
{
/* Parse the stack */
long argc = (long)*stack;
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index e95bd56b332f..35856b11c143 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu max -machine microvm -no-acpi
+QEMU_MACHINE := -cpu max -machine microvm,acpi=off
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
@@ -120,9 +120,9 @@ KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
+QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 86d267db87bb..7bc74969a819 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
struct kvm_memory_slot *memslot;
int as_id, id;
+ if (!mask)
+ return;
+
as_id = slot >> 16;
id = (u16)slot;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 0f4e0cf4f158..747fe251e445 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -510,8 +510,10 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
}
if (folio_test_hwpoison(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
r = -EHWPOISON;
- goto out_unlock;
+ goto out_fput;
}
page = folio_file_page(folio, index);
@@ -522,7 +524,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
r = 0;
-out_unlock:
folio_unlock(folio);
out_fput:
fput(file);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 14841acb8b95..1192942aef91 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -651,7 +651,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
range->on_lock(kvm);
if (IS_KVM_NULL_FN(range->handler))
- break;
+ goto mmu_unlock;
}
r.ret |= range->handler(kvm, &gfn_range);
}
@@ -660,6 +660,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
if (range->flush_on_ret && r.ret)
kvm_flush_remote_tlbs(kvm);
+mmu_unlock:
if (r.found_memslot)
KVM_MMU_UNLOCK(kvm);
@@ -4025,12 +4026,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
struct kvm_vcpu *vcpu;
- int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
+ int last_boosted_vcpu;
unsigned long i;
int yielded = 0;
int try = 3;
int pass;
+ last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu);
kvm_vcpu_set_in_spin_loop(me, true);
/*
* We boost the priority of a VCPU that is runnable but not
@@ -4068,7 +4070,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
yielded = kvm_vcpu_yield_to(vcpu);
if (yielded > 0) {
- kvm->last_boosted_vcpu = i;
+ WRITE_ONCE(kvm->last_boosted_vcpu, i);
break;
} else if (yielded < 0) {
try--;
@@ -4427,7 +4429,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_regs *kvm_regs;
r = -ENOMEM;
- kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
+ kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
if (!kvm_regs)
goto out;
r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
@@ -4454,8 +4456,7 @@ out_free1:
break;
}
case KVM_GET_SREGS: {
- kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
- GFP_KERNEL_ACCOUNT);
+ kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
r = -ENOMEM;
if (!kvm_sregs)
goto out;
@@ -4547,7 +4548,7 @@ out_free1:
break;
}
case KVM_GET_FPU: {
- fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
+ fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
r = -ENOMEM;
if (!fpu)
goto out;
@@ -6210,7 +6211,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
active = kvm_active_vms;
mutex_unlock(&kvm_lock);
- env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
+ env = kzalloc(sizeof(*env), GFP_KERNEL);
if (!env)
return;
@@ -6226,7 +6227,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
if (!IS_ERR(kvm->debugfs_dentry)) {
- char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
+ char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
if (p) {
tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);