aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/silicon-errata.txt1
-rw-r--r--Documentation/cgroup-v2.txt11
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/emac.txt62
-rw-r--r--Documentation/networking/ip-sysctl.txt3
-rw-r--r--Documentation/virtual/kvm/api.txt378
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt41
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt5
-rw-r--r--Makefile2
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/arm.c6
-rw-r--r--arch/arm/kvm/handle_exit.c19
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig14
-rw-r--r--arch/arm64/include/asm/cpufeature.h2
-rw-r--r--arch/arm64/include/asm/kvm_host.h4
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/kernel/cpuidle.c2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c6
-rw-r--r--arch/arm64/kvm/handle_exit.c19
-rw-r--r--arch/arm64/kvm/hyp/tlb.c64
-rw-r--r--arch/arm64/mm/kasan_init.c2
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/cpu-features.h10
-rw-r--r--arch/mips/include/asm/cpu-info.h2
-rw-r--r--arch/mips/include/asm/cpu.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h468
-rw-r--r--arch/mips/include/asm/maar.h10
-rw-r--r--arch/mips/include/asm/mipsregs.h62
-rw-r--r--arch/mips/include/asm/tlb.h6
-rw-r--r--arch/mips/include/uapi/asm/inst.h2
-rw-r--r--arch/mips/include/uapi/asm/kvm.h22
-rw-r--r--arch/mips/kernel/cpu-probe.c13
-rw-r--r--arch/mips/kernel/time.c1
-rw-r--r--arch/mips/kvm/Kconfig27
-rw-r--r--arch/mips/kvm/Makefile9
-rw-r--r--arch/mips/kvm/emulate.c500
-rw-r--r--arch/mips/kvm/entry.c132
-rw-r--r--arch/mips/kvm/hypcall.c53
-rw-r--r--arch/mips/kvm/interrupt.h5
-rw-r--r--arch/mips/kvm/mips.c123
-rw-r--r--arch/mips/kvm/mmu.c20
-rw-r--r--arch/mips/kvm/tlb.c441
-rw-r--r--arch/mips/kvm/trace.h74
-rw-r--r--arch/mips/kvm/trap_emul.c73
-rw-r--r--arch/mips/kvm/vz.c3223
-rw-r--r--arch/mips/mm/cache.c1
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/openrisc/include/asm/cmpxchg.h8
-rw-r--r--arch/openrisc/include/asm/uaccess.h2
-rw-r--r--arch/openrisc/kernel/or32_ksyms.c4
-rw-r--r--arch/openrisc/kernel/process.c1
-rw-r--r--arch/parisc/include/asm/cacheflush.h23
-rw-r--r--arch/parisc/include/asm/uaccess.h3
-rw-r--r--arch/parisc/include/uapi/asm/unistd.h3
-rw-r--r--arch/parisc/kernel/cache.c22
-rw-r--r--arch/parisc/kernel/module.c8
-rw-r--r--arch/parisc/kernel/perf.c94
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/parisc/kernel/syscall_table.S1
-rw-r--r--arch/powerpc/boot/zImage.lds.S1
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c2
-rw-r--r--arch/powerpc/include/asm/bitops.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h3
-rw-r--r--arch/powerpc/include/asm/mce.h108
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/cputable.c3
-rw-r--r--arch/powerpc/kernel/mce.c88
-rw-r--r--arch/powerpc/kernel/mce_power.c237
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/isa207-common.c43
-rw-r--r--arch/powerpc/perf/isa207-common.h1
-rw-r--r--arch/powerpc/platforms/powernv/opal.c21
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c20
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c4
-rw-r--r--arch/powerpc/purgatory/trampoline.S12
-rw-r--r--arch/s390/crypto/paes_s390.c5
-rw-r--r--arch/s390/include/asm/cputime.h20
-rw-r--r--arch/s390/include/asm/elf.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h40
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/nmi.h12
-rw-r--r--arch/s390/include/asm/processor.h5
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/switch_to.h3
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/asm/timex.h12
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h77
-rw-r--r--arch/s390/include/uapi/asm/kvm.h25
-rw-r--r--arch/s390/include/uapi/asm/unistd.h4
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c2
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S36
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c13
-rw-r--r--arch/s390/kernel/nmi.c19
-rw-r--r--arch/s390/kernel/process.c10
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c86
-rw-r--r--arch/s390/kernel/setup.c18
-rw-r--r--arch/s390/kernel/smp.c43
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/s390/kvm/gaccess.c6
-rw-r--r--arch/s390/kvm/intercept.c24
-rw-r--r--arch/s390/kvm/interrupt.c135
-rw-r--r--arch/s390/kvm/kvm-s390.c127
-rw-r--r--arch/s390/kvm/kvm-s390.h3
-rw-r--r--arch/s390/kvm/priv.c31
-rw-r--r--arch/s390/kvm/sthyi.c3
-rw-r--r--arch/s390/kvm/trace-s390.h52
-rw-r--r--arch/s390/kvm/vsie.c72
-rw-r--r--arch/s390/mm/pgtable.c19
-rw-r--r--arch/score/kernel/traps.c1
-rw-r--r--arch/score/mm/extable.c2
-rw-r--r--arch/x86/events/core.c16
-rw-r--r--arch/x86/include/asm/kvm_host.h8
-rw-r--r--arch/x86/include/asm/kvm_page_track.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level.h3
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/purgatory.h20
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/vmx.h25
-rw-r--r--arch/x86/kernel/acpi/boot.c9
-rw-r--r--arch/x86/kernel/apic/apic.c26
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c9
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c36
-rw-r--r--arch/x86/kvm/Kconfig12
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/assigned-dev.c1058
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rw-r--r--arch/x86/kvm/i8259.c75
-rw-r--r--arch/x86/kvm/ioapic.c31
-rw-r--r--arch/x86/kvm/ioapic.h16
-rw-r--r--arch/x86/kvm/iommu.c356
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h32
-rw-r--r--arch/x86/kvm/irq_comm.c45
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/mmu.h3
-rw-r--r--arch/x86/kvm/page_track.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h54
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c334
-rw-r--r--arch/x86/kvm/x86.c147
-rw-r--r--arch/x86/mm/kasan_init_64.c1
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/platform/intel-mid/device_libs/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c82
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c2
-rw-r--r--arch/x86/platform/intel-mid/mfld.c15
-rw-r--r--arch/x86/purgatory/purgatory.c35
-rw-r--r--arch/x86/purgatory/purgatory.h8
-rw-r--r--arch/x86/purgatory/setup-x86_64.S2
-rw-r--r--arch/x86/purgatory/sha256.h1
-rw-r--r--block/bio.c12
-rw-r--r--block/blk-core.c30
-rw-r--r--block/blk-mq-tag.c3
-rw-r--r--block/blk-mq.c9
-rw-r--r--crypto/af_alg.c9
-rw-r--r--crypto/algif_hash.c9
-rw-r--r--drivers/acpi/acpi_processor.c57
-rw-r--r--drivers/acpi/bus.c1
-rw-r--r--drivers/acpi/processor_core.c133
-rw-r--r--drivers/ata/ahci_qoriq.c6
-rw-r--r--drivers/ata/libata-sff.c1
-rw-r--r--drivers/ata/libata-transport.c9
-rw-r--r--drivers/base/core.c5
-rw-r--r--drivers/char/hw_random/omap-rng.c16
-rw-r--r--drivers/char/random.c129
-rw-r--r--drivers/clocksource/tcb_clksrc.c16
-rw-r--r--drivers/cpufreq/cpufreq.c8
-rw-r--r--drivers/cpufreq/intel_pstate.c64
-rw-r--r--drivers/crypto/s5p-sss.c132
-rw-r--r--drivers/dax/dax.c33
-rw-r--r--drivers/gpio/gpio-altera-a10sr.c2
-rw-r--r--drivers/gpio/gpio-altera.c26
-rw-r--r--drivers/gpio/gpio-mcp23s08.c65
-rw-r--r--drivers/gpio/gpio-mockup.c7
-rw-r--r--drivers/gpio/gpio-xgene.c13
-rw-r--r--drivers/gpu/drm/amd/acp/Makefile2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c2
-rw-r--r--drivers/gpu/drm/arm/malidp_crtc.c3
-rw-r--r--drivers/gpu/drm/arm/malidp_hw.c2
-rw-r--r--drivers/gpu/drm/arm/malidp_planes.c18
-rw-r--r--drivers/gpu/drm/arm/malidp_regs.h1
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c97
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_object.h3
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c57
-rw-r--r--drivers/gpu/drm/i915/intel_display.c58
-rw-r--r--drivers/gpu/drm/i915/intel_fbdev.c10
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c18
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c3
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c13
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c3
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c6
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_crtc.c37
-rw-r--r--drivers/hid/Kconfig5
-rw-r--r--drivers/hid/hid-chicony.c1
-rw-r--r--drivers/hid/hid-core.c2
-rw-r--r--drivers/hid/hid-corsair.c47
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-sony.c2
-rw-r--r--drivers/hid/usbhid/hid-quirks.c3
-rw-r--r--drivers/hid/wacom_sys.c4
-rw-r--r--drivers/hid/wacom_wac.c10
-rw-r--r--drivers/irqchip/irq-crossbar.c8
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c16
-rw-r--r--drivers/isdn/gigaset/bas-gigaset.c3
-rw-r--r--drivers/macintosh/macio_asic.c1
-rw-r--r--drivers/md/dm.c29
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c27
-rw-r--r--drivers/md/md.h6
-rw-r--r--drivers/md/raid1.c29
-rw-r--r--drivers/md/raid10.c44
-rw-r--r--drivers/md/raid5.c5
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c10
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c36
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c24
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c40
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c25
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c206
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h16
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c110
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c104
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_config.h6
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c17
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.h4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_main.h42
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h43
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h1
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c12
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c184
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h4
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c64
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h1
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c25
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c43
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iscsi.c31
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c13
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ooo.c2
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c47
-rw-r--r--drivers/net/hyperv/hyperv_net.h3
-rw-r--r--drivers/net/hyperv/netvsc.c8
-rw-r--r--drivers/net/hyperv/netvsc_drv.c11
-rw-r--r--drivers/net/phy/marvell.c15
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/phy/spi_ks8995.c3
-rw-r--r--drivers/net/team/team.c1
-rw-r--r--drivers/net/tun.c19
-rw-r--r--drivers/net/vrf.c3
-rw-r--r--drivers/net/vxlan.c73
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c4
-rw-r--r--drivers/net/wimax/i2400m/usb.c3
-rw-r--r--drivers/net/xen-netback/interface.c26
-rw-r--r--drivers/net/xen-netback/netback.c2
-rw-r--r--drivers/net/xen-netback/xenbus.c20
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c49
-rw-r--r--drivers/platform/x86/asus-wmi.c22
-rw-r--r--drivers/platform/x86/asus-wmi.h1
-rw-r--r--drivers/platform/x86/fujitsu-laptop.c451
-rw-r--r--drivers/ptp/ptp_kvm.c5
-rw-r--r--drivers/remoteproc/Kconfig6
-rw-r--r--drivers/scsi/Kconfig5
-rw-r--r--drivers/scsi/aacraid/src.c2
-rw-r--r--drivers/scsi/hpsa.c53
-rw-r--r--drivers/scsi/hpsa.h1
-rw-r--r--drivers/scsi/hpsa_cmd.h2
-rw-r--r--drivers/scsi/libiscsi.c26
-rw-r--r--drivers/scsi/lpfc/lpfc.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c13
-rw-r--r--drivers/scsi/lpfc/lpfc_crtn.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c22
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c22
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c24
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c135
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c107
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.h1
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c43
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c68
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h6
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.h4
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c17
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c4
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h3
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c19
-rw-r--r--drivers/scsi/qedf/qedf_dbg.h13
-rw-r--r--drivers/scsi/qedf/qedf_fip.c2
-rw-r--r--drivers/scsi/qedf/qedf_io.c4
-rw-r--r--drivers/scsi/qedf/qedf_main.c4
-rw-r--r--drivers/scsi/qedi/qedi_debugfs.c16
-rw-r--r--drivers/scsi/qedi/qedi_fw.c4
-rw-r--r--drivers/scsi/qedi/qedi_gbl.h8
-rw-r--r--drivers/scsi/qedi/qedi_iscsi.c8
-rw-r--r--drivers/scsi/qedi/qedi_main.c2
-rw-r--r--drivers/scsi/qla2xxx/Kconfig1
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_dbg.c12
-rw-r--r--drivers/scsi/qla2xxx/qla_dbg.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h56
-rw-r--r--drivers/scsi/qla2xxx/qla_dfs.c107
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h18
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c85
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c13
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c41
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c304
-rw-r--r--drivers/scsi/qla2xxx/qla_mid.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c23
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c748
-rw-r--r--drivers/scsi/qla2xxx/qla_target.h39
-rw-r--r--drivers/scsi/qla2xxx/qla_version.h6
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c49
-rw-r--r--drivers/scsi/scsi_lib.c14
-rw-r--r--drivers/scsi/scsi_priv.h3
-rw-r--r--drivers/scsi/sd.c17
-rw-r--r--drivers/scsi/storvsc_drv.c27
-rw-r--r--drivers/scsi/ufs/ufs.h22
-rw-r--r--drivers/scsi/ufs/ufshcd.c233
-rw-r--r--drivers/scsi/ufs/ufshcd.h15
-rw-r--r--drivers/scsi/vmw_pvscsi.c2
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c4
-rw-r--r--drivers/target/target_core_alua.c82
-rw-r--r--drivers/target/target_core_configfs.c4
-rw-r--r--drivers/target/target_core_pscsi.c50
-rw-r--r--drivers/target/target_core_sbc.c10
-rw-r--r--drivers/target/target_core_tpg.c3
-rw-r--r--drivers/target/target_core_transport.c3
-rw-r--r--drivers/target/target_core_user.c152
-rw-r--r--drivers/tty/serial/st-asc.c11
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--fs/afs/callback.c7
-rw-r--r--fs/afs/cmservice.c11
-rw-r--r--fs/afs/file.c20
-rw-r--r--fs/afs/fsclient.c77
-rw-r--r--fs/afs/inode.c42
-rw-r--r--fs/afs/internal.h23
-rw-r--r--fs/afs/misc.c2
-rw-r--r--fs/afs/mntpt.c53
-rw-r--r--fs/afs/rxrpc.c149
-rw-r--r--fs/afs/security.c9
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c16
-rw-r--r--fs/afs/write.c76
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/f2fs/debug.c1
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c163
-rw-r--r--fs/f2fs/segment.c6
-rw-r--r--fs/fs-writeback.c35
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/client.c25
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h14
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c5
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4client.c4
-rw-r--r--fs/nfs/nfs4proc.c11
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/pnfs_nfs.c31
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c87
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c26
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c11
-rw-r--r--fs/xfs/xfs_inode.c12
-rw-r--r--include/crypto/if_alg.h2
-rw-r--r--include/linux/acpi.h5
-rw-r--r--include/linux/dccp.h1
-rw-r--r--include/linux/device.h1
-rw-r--r--include/linux/filter.h16
-rw-r--r--include/linux/gpio/consumer.h16
-rw-r--r--include/linux/irqchip/arm-gic-v3.h2
-rw-r--r--include/linux/irqdomain.h4
-rw-r--r--include/linux/kasan.h1
-rw-r--r--include/linux/kvm_host.h26
-rw-r--r--include/linux/list_nulls.h5
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/phy.h4
-rw-r--r--include/linux/purgatory.h23
-rw-r--r--include/linux/random.h18
-rw-r--r--include/linux/rculist_nulls.h14
-rw-r--r--include/net/inet_common.h3
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/sock.h9
-rw-r--r--include/scsi/libiscsi.h1
-rw-r--r--include/scsi/scsi_device.h4
-rw-r--r--include/target/target_core_backend.h7
-rw-r--r--include/target/target_core_base.h2
-rw-r--r--include/uapi/drm/omap_drm.h38
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/kvm.h9
-rw-r--r--include/uapi/linux/packet_diag.h2
-rw-r--r--init/main.c1
-rw-r--r--kernel/bpf/hashtab.c119
-rw-r--r--kernel/bpf/lpm_trie.c6
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/pids.c2
-rw-r--r--kernel/cpu.c28
-rw-r--r--kernel/events/core.c64
-rw-r--r--kernel/futex.c22
-rw-r--r--kernel/kexec_file.c8
-rw-r--r--kernel/kexec_internal.h6
-rw-r--r--kernel/locking/rwsem-spinlock.c16
-rw-r--r--kernel/memremap.c4
-rw-r--r--kernel/sched/deadline.c63
-rw-r--r--kernel/sched/loadavg.c20
-rw-r--r--kernel/workqueue.c1
-rw-r--r--mm/gup.c2
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/percpu-vm.c7
-rw-r--r--mm/percpu.c5
-rw-r--r--mm/swap_slots.c2
-rw-r--r--mm/vmalloc.c3
-rw-r--r--mm/z3fold.c1
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c3
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/skbuff.c30
-rw-r--r--net/core/sock.c106
-rw-r--r--net/dccp/ccids/ccid2.c1
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/ipv6.c8
-rw-r--r--net/dccp/minisocks.c24
-rw-r--r--net/decnet/af_decnet.c5
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv6/af_inet6.c10
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/irda/af_irda.c5
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/phonet/pep.c6
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/connection.c1
-rw-r--r--net/rds/ib_cm.c47
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/tcp.c38
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_listen.c11
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/rxrpc/input.c27
-rw-r--r--net/rxrpc/recvmsg.c4
-rw-r--r--net/rxrpc/sendmsg.c49
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/socket.c5
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/tipc/socket.c8
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/x25/af_x25.c3
-rw-r--r--net/xfrm/xfrm_policy.c19
-rw-r--r--tools/include/uapi/linux/bpf_perf_event.h18
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat381
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt26
-rw-r--r--tools/perf/util/symbol.c2
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c4
-rw-r--r--tools/testing/selftests/powerpc/include/vsx_asm.h48
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c109
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c32
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c5
-rw-r--r--virt/kvm/eventfd.c7
-rw-r--r--virt/kvm/irqchip.c11
-rw-r--r--virt/kvm/kvm_main.c74
537 files changed, 13220 insertions, 6102 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index a71b8095dbd8..2f66683500b8 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
| | | | |
| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
+| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 3b8449f8ac7e..49d7c997fa1e 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1142,16 +1142,17 @@ used by the kernel.
pids.max
- A read-write single value file which exists on non-root cgroups. The
- default is "max".
+ A read-write single value file which exists on non-root
+ cgroups. The default is "max".
- Hard limit of number of processes.
+ Hard limit of number of processes.
pids.current
- A read-only single value file which exists on all cgroups.
+ A read-only single value file which exists on all cgroups.
- The number of processes currently in the cgroup and its descendants.
+ The number of processes currently in the cgroup and its
+ descendants.
Organisational operations are not blocked by cgroup policies, so it is
possible to have pids.current > pids.max. This can be done by either
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
index 712baf6c3e24..44b842b6ca15 100644
--- a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
+++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
@@ -71,6 +71,9 @@
For Axon it can be absent, though my current driver
doesn't handle phy-address yet so for now, keep
0x00ffffff in it.
+ - phy-handle : Used to describe configurations where a external PHY
+ is used. Please refer to:
+ Documentation/devicetree/bindings/net/ethernet.txt
- rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
operations (if absent the value is the same as
rx-fifo-size). For Axon, either absent or 2048.
@@ -81,8 +84,22 @@
offload, phandle of the TAH device node.
- tah-channel : 1 cell, optional. If appropriate, channel used on the
TAH engine.
+ - fixed-link : Fixed-link subnode describing a link to a non-MDIO
+ managed entity. See
+ Documentation/devicetree/bindings/net/fixed-link.txt
+ for details.
+ - mdio subnode : When the EMAC has a phy connected to its local
+ mdio, which us supported by the kernel's network
+ PHY library in drivers/net/phy, there must be device
+ tree subnode with the following required properties:
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
- Example:
+ For PHY definitions: Please refer to
+ Documentation/devicetree/bindings/net/phy.txt and
+ Documentation/devicetree/bindings/net/ethernet.txt
+
+ Examples:
EMAC0: ethernet@40000800 {
device_type = "network";
@@ -104,6 +121,48 @@
zmii-channel = <0>;
};
+ EMAC1: ethernet@ef600c00 {
+ device_type = "network";
+ compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+ interrupt-parent = <&EMAC1>;
+ interrupts = <0 1>;
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+ 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+ reg = <0xef600c00 0x000000c4>;
+ local-mac-address = [000000000000]; /* Filled in by U-Boot */
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <9000>;
+ rx-fifo-size = <16384>;
+ tx-fifo-size = <2048>;
+ fifo-entry-size = <10>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ phy-map = <0x00000000>;
+ rgmii-device = <&RGMII0>;
+ rgmii-channel = <0>;
+ tah-device = <&TAH0>;
+ tah-channel = <0>;
+ has-inverted-stacr-oc;
+ has-new-stacr-staopc;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+ };
+ };
+
+
ii) McMAL node
Required properties:
@@ -145,4 +204,3 @@
- revision : as provided by the RGMII new version register if
available.
For Axon: 0x0000012a
-
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fc73eeb7b3b8..ab0230461377 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN
FALSE (router)
forwarding - BOOLEAN
- Enable IP forwarding on this interface.
+ Enable IP forwarding on this interface. This controls whether packets
+ received _on_ this interface can be forwarded.
mc_forwarding - BOOLEAN
Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 069450938b79..e60be91d8036 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -115,12 +115,17 @@ will access the virtual machine's physical address space; offset zero
corresponds to guest physical address zero. Use of mmap() on a VM fd
is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
available.
-You most certainly want to use 0 as machine type.
+You probably want to use 0 as machine type.
In order to create user controlled virtual machines on S390, check
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
privileged user (CAP_SYS_ADMIN).
+To use hardware assisted virtualization on MIPS (VZ ASE) rather than
+the default trap & emulate implementation (which changes the virtual
+memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
+flag KVM_VM_MIPS_VZ.
+
4.3 KVM_GET_MSR_INDEX_LIST
@@ -951,6 +956,10 @@ This ioctl allows the user to create or modify a guest physical memory
slot. When changing an existing slot, it may be moved in the guest
physical memory space, or its flags may be modified. It may not be
resized. Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
specifies the address space which is being modified. They must be
@@ -1317,130 +1326,6 @@ The flags bitmap is defined as:
/* the host supports the ePAPR idle hcall
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-4.48 KVM_ASSIGN_PCI_DEVICE (deprecated)
-
-Capability: none
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_pci_dev (in)
-Returns: 0 on success, -1 on error
-
-Assigns a host PCI device to the VM.
-
-struct kvm_assigned_pci_dev {
- __u32 assigned_dev_id;
- __u32 busnr;
- __u32 devfn;
- __u32 flags;
- __u32 segnr;
- union {
- __u32 reserved[11];
- };
-};
-
-The PCI device is specified by the triple segnr, busnr, and devfn.
-Identification in succeeding service requests is done via assigned_dev_id. The
-following flags are specified:
-
-/* Depends on KVM_CAP_IOMMU */
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-/* The following two depend on KVM_CAP_PCI_2_3 */
-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
-
-If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts
-via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other
-assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the
-guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details.
-
-The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
-isolation of the device. Usages not specifying this flag are deprecated.
-
-Only PCI header type 0 devices with PCI BAR resources are supported by
-device assignment. The user requesting this ioctl must have read/write
-access to the PCI sysfs resource files associated with the device.
-
-Errors:
- ENOTTY: kernel does not support this ioctl
-
- Other error conditions may be defined by individual device types or
- have their standard meanings.
-
-
-4.49 KVM_DEASSIGN_PCI_DEVICE (deprecated)
-
-Capability: none
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_pci_dev (in)
-Returns: 0 on success, -1 on error
-
-Ends PCI device assignment, releasing all associated resources.
-
-See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
-used in kvm_assigned_pci_dev to identify the device.
-
-Errors:
- ENOTTY: kernel does not support this ioctl
-
- Other error conditions may be defined by individual device types or
- have their standard meanings.
-
-4.50 KVM_ASSIGN_DEV_IRQ (deprecated)
-
-Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_irq (in)
-Returns: 0 on success, -1 on error
-
-Assigns an IRQ to a passed-through device.
-
-struct kvm_assigned_irq {
- __u32 assigned_dev_id;
- __u32 host_irq; /* ignored (legacy field) */
- __u32 guest_irq;
- __u32 flags;
- union {
- __u32 reserved[12];
- };
-};
-
-The following flags are defined:
-
-#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
-
-It is not valid to specify multiple types per host or guest IRQ. However, the
-IRQ type of host and guest can differ or can even be null.
-
-Errors:
- ENOTTY: kernel does not support this ioctl
-
- Other error conditions may be defined by individual device types or
- have their standard meanings.
-
-
-4.51 KVM_DEASSIGN_DEV_IRQ (deprecated)
-
-Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_irq (in)
-Returns: 0 on success, -1 on error
-
-Ends an IRQ assignment to a passed-through device.
-
-See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
-by assigned_dev_id, flags must correspond to the IRQ type specified on
-KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
-
-
4.52 KVM_SET_GSI_ROUTING
Capability: KVM_CAP_IRQ_ROUTING
@@ -1527,52 +1412,6 @@ struct kvm_irq_routing_hv_sint {
__u32 sint;
};
-4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
-
-Capability: none
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_msix_nr (in)
-Returns: 0 on success, -1 on error
-
-Set the number of MSI-X interrupts for an assigned device. The number is
-reset again by terminating the MSI-X assignment of the device via
-KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
-point will fail.
-
-struct kvm_assigned_msix_nr {
- __u32 assigned_dev_id;
- __u16 entry_nr;
- __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV 256
-
-
-4.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated)
-
-Capability: none
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_msix_entry (in)
-Returns: 0 on success, -1 on error
-
-Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting
-the GSI vector to zero means disabling the interrupt.
-
-struct kvm_assigned_msix_entry {
- __u32 assigned_dev_id;
- __u32 gsi;
- __u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
-};
-
-Errors:
- ENOTTY: kernel does not support this ioctl
-
- Other error conditions may be defined by individual device types or
- have their standard meanings.
-
4.55 KVM_SET_TSC_KHZ
@@ -1724,40 +1563,6 @@ should skip processing the bitmap and just invalidate everything. It must
be set to the number of set bits in the bitmap.
-4.61 KVM_ASSIGN_SET_INTX_MASK (deprecated)
-
-Capability: KVM_CAP_PCI_2_3
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_assigned_pci_dev (in)
-Returns: 0 on success, -1 on error
-
-Allows userspace to mask PCI INTx interrupts from the assigned device. The
-kernel will not deliver INTx interrupts to the guest between setting and
-clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of
-and emulation of PCI 2.3 INTx disable command register behavior.
-
-This may be used for both PCI 2.3 devices supporting INTx disable natively and
-older devices lacking this support. Userspace is responsible for emulating the
-read value of the INTx disable bit in the guest visible PCI command register.
-When modifying the INTx disable state, userspace should precede updating the
-physical device command register by calling this ioctl to inform the kernel of
-the new intended INTx mask state.
-
-Note that the kernel uses the device INTx disable bit to internally manage the
-device interrupt state for PCI 2.3 devices. Reads of this register may
-therefore not match the expected value. Writes should always use the guest
-intended INTx disable value rather than attempting to read-copy-update the
-current physical device state. Races between user and kernel updates to the
-INTx disable bit are handled lazily in the kernel. It's possible the device
-may generate unintended interrupts, but they will not be injected into the
-guest.
-
-See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
-by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
-evaluated.
-
-
4.62 KVM_CREATE_SPAPR_TCE
Capability: KVM_CAP_SPAPR_TCE
@@ -2064,11 +1869,23 @@ registers, find a list below:
MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64
MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64
MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
+ MIPS | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32
MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
+ MIPS | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64
MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
+ MIPS | KVM_REG_MIPS_CP0_PAGEGRAIN | 32
+ MIPS | KVM_REG_MIPS_CP0_SEGCTL0 | 64
+ MIPS | KVM_REG_MIPS_CP0_SEGCTL1 | 64
+ MIPS | KVM_REG_MIPS_CP0_SEGCTL2 | 64
+ MIPS | KVM_REG_MIPS_CP0_PWBASE | 64
+ MIPS | KVM_REG_MIPS_CP0_PWFIELD | 64
+ MIPS | KVM_REG_MIPS_CP0_PWSIZE | 64
MIPS | KVM_REG_MIPS_CP0_WIRED | 32
+ MIPS | KVM_REG_MIPS_CP0_PWCTL | 32
MIPS | KVM_REG_MIPS_CP0_HWRENA | 32
MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64
+ MIPS | KVM_REG_MIPS_CP0_BADINSTR | 32
+ MIPS | KVM_REG_MIPS_CP0_BADINSTRP | 32
MIPS | KVM_REG_MIPS_CP0_COUNT | 32
MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
@@ -2085,6 +1902,7 @@ registers, find a list below:
MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32
MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32
MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32
+ MIPS | KVM_REG_MIPS_CP0_XCONTEXT | 64
MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64
MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64
MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64
@@ -2092,6 +1910,7 @@ registers, find a list below:
MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64
MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64
MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64
+ MIPS | KVM_REG_MIPS_CP0_MAAR(0..63) | 64
MIPS | KVM_REG_MIPS_COUNT_CTL | 64
MIPS | KVM_REG_MIPS_COUNT_RESUME | 64
MIPS | KVM_REG_MIPS_COUNT_HZ | 64
@@ -2158,6 +1977,10 @@ hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
the PFNX field starting at bit 30.
+MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit
+patterns:
+ 0x7030 0000 0001 01 <reg:8>
+
MIPS KVM control registers (see above) have the following id bit patterns:
0x7030 0000 0002 <reg:16>
@@ -3373,6 +3196,69 @@ struct kvm_ppc_resize_hpt {
__u32 pad;
};
+4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: system ioctl
+Parameters: u64 mce_cap (out)
+Returns: 0 on success, -1 on error
+
+Returns supported MCE capabilities. The u64 mce_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register. Supported
+capabilities will have the corresponding bits set.
+
+4.105 KVM_X86_SETUP_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: u64 mcg_cap (in)
+Returns: 0 on success,
+ -EFAULT if u64 mcg_cap cannot be read,
+ -EINVAL if the requested number of banks is invalid,
+ -EINVAL if requested MCE capability is not supported.
+
+Initializes MCE support for use. The u64 mcg_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register and
+specifies which capabilities should be enabled. The maximum
+supported number of error-reporting banks can be retrieved when
+checking for KVM_CAP_MCE. The supported capabilities can be
+retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
+
+4.106 KVM_X86_SET_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_x86_mce (in)
+Returns: 0 on success,
+ -EFAULT if struct kvm_x86_mce cannot be read,
+ -EINVAL if the bank number is invalid,
+ -EINVAL if VAL bit is not set in status field.
+
+Inject a machine check error (MCE) into the guest. The input
+parameter is:
+
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+If the MCE being reported is an uncorrected error, KVM will
+inject it as an MCE exception into the guest. If the guest
+MCG_STATUS register reports that an MCE is in progress, KVM
+causes an KVM_EXIT_SHUTDOWN vmexit.
+
+Otherwise, if the MCE is a corrected error, KVM will just
+store it in the corresponding bank (provided this bank is
+not holding a previously reported uncorrected error).
+
5. The kvm_run structure
------------------------
@@ -4097,6 +3983,23 @@ to take care of that.
This capability can be enabled dynamically even if VCPUs were already
created and are running.
+7.9 KVM_CAP_S390_GS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success; -EINVAL if the machine does not support
+ guarded storage; -EBUSY if a VCPU has already been created.
+
+Allows use of guarded storage for the KVM guest.
+
+7.10 KVM_CAP_S390_AIS
+
+Architectures: s390
+Parameters: none
+
+Allow use of adapter-interruption suppression.
+Returns: 0 on success; -EBUSY if a VCPU has already been created.
+
8. Other capabilities.
----------------------
@@ -4143,3 +4046,68 @@ This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
hashed page table MMU defined in Power ISA V3.00 (as implemented in
the POWER9 processor), including in-memory segment tables.
+
+8.5 KVM_CAP_MIPS_VZ
+
+Architectures: mips
+
+This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
+it is available, means that full hardware assisted virtualization capabilities
+of the hardware are available for use through KVM. An appropriate
+KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which
+utilises it.
+
+If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
+available, it means that the VM is using full hardware assisted virtualization
+capabilities of the hardware. This is useful to check after creating a VM with
+KVM_VM_MIPS_DEFAULT.
+
+The value returned by KVM_CHECK_EXTENSION should be compared against known
+values (see below). All other values are reserved. This is to allow for the
+possibility of other hardware assisted virtualization implementations which
+may be incompatible with the MIPS VZ ASE.
+
+ 0: The trap & emulate implementation is in use to run guest code in user
+ mode. Guest virtual memory segments are rearranged to fit the guest in the
+ user mode address space.
+
+ 1: The MIPS VZ ASE is in use, providing full hardware assisted
+ virtualization, including standard guest virtual memory segments.
+
+8.6 KVM_CAP_MIPS_TE
+
+Architectures: mips
+
+This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
+it is available, means that the trap & emulate implementation is available to
+run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
+assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
+to KVM_CREATE_VM to create a VM which utilises it.
+
+If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
+available, it means that the VM is using trap & emulate.
+
+8.7 KVM_CAP_MIPS_64BIT
+
+Architectures: mips
+
+This capability indicates the supported architecture type of the guest, i.e. the
+supported register and address width.
+
+The values returned when this capability is checked by KVM_CHECK_EXTENSION on a
+kvm VM handle correspond roughly to the CP0_Config.AT register field, and should
+be checked specifically against known values (see below). All other values are
+reserved.
+
+ 0: MIPS32 or microMIPS32.
+ Both registers and addresses are 32-bits wide.
+ It will only be possible to run 32-bit guest code.
+
+ 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
+ Registers are 64-bits wide, but addresses are 32-bits wide.
+ 64-bit guest code may run but cannot access MIPS64 memory segments.
+ It will also be possible to run 32-bit guest code.
+
+ 2: MIPS64 or microMIPS64 with access to all address segments.
+ Both registers and addresses are 64-bits wide.
+ It will be possible to run 64-bit or 32-bit guest code.
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 6b0e115301c8..c2518cea8ab4 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -14,6 +14,8 @@ FLIC provides support to
- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
- enable/disable for the guest transparent async page faults
- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
+- modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM)
+- inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT)
Groups:
KVM_DEV_FLIC_ENQUEUE
@@ -64,12 +66,18 @@ struct kvm_s390_io_adapter {
__u8 isc;
__u8 maskable;
__u8 swap;
- __u8 pad;
+ __u8 flags;
};
id contains the unique id for the adapter, isc the I/O interruption subclass
- to use, maskable whether this adapter may be masked (interrupts turned off)
- and swap whether the indicators need to be byte swapped.
+ to use, maskable whether this adapter may be masked (interrupts turned off),
+ swap whether the indicators need to be byte swapped, and flags contains
+ further characteristics of the adapter.
+ Currently defined values for 'flags' are:
+ - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
+ (adapter-interrupt-suppression) facility. This flag only has an effect if
+ the AIS capability is enabled.
+ Unknown flag values are ignored.
KVM_DEV_FLIC_ADAPTER_MODIFY
@@ -101,6 +109,33 @@ struct kvm_s390_io_adapter_req {
release a userspace page for the translated address specified in addr
from the list of mappings
+ KVM_DEV_FLIC_AISM
+ modify the adapter-interruption-suppression mode for a given isc if the
+ AIS capability is enabled. Takes a kvm_s390_ais_req describing:
+
+struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
+};
+
+ isc contains the target I/O interruption subclass, mode the target
+ adapter-interruption-suppression mode. The following modes are
+ currently supported:
+ - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
+ is always allowed;
+ - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
+ injection is only allowed once and the following adapter interrupts
+ will be suppressed until the mode is set again to ALL-Interruptions
+ or SINGLE-Interruption mode.
+
+ KVM_DEV_FLIC_AIRQ_INJECT
+ Inject adapter interrupts on a specified adapter.
+ attr->attr contains the unique id for the adapter, which allows for
+ adapter-specific checks and actions.
+ For adapters subject to AIS, handle the airq injection suppression for
+ an isc according to the adapter-interruption-suppression mode on condition
+ that the AIS capability is enabled.
+
Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
FLIC with an unknown group or attribute gives the error code EINVAL (instead of
ENXIO, as specified in the API documentation). It is not possible to conclude
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index feaaa634f154..a890529c63ed 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -28,6 +28,11 @@ S390:
property inside the device tree's /hypervisor node.
For more information refer to Documentation/virtual/kvm/ppc-pv.txt
+MIPS:
+ KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
+ number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
+ the return value is placed in $2 (v0).
+
KVM Hypercalls Documentation
===========================
The template for each hypercall is:
diff --git a/Makefile b/Makefile
index 165cf9783a5d..b2faa9319372 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 11
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
NAME = Fearless Coyote
# *DOCUMENTATION*
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089fb44dc..a3f0b3d50089 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -209,6 +209,7 @@
#define HSR_EC_IABT_HYP (0x21)
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
+#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index cc495d799c67..de67ce647501 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,8 +30,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
#define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 6ebd3e6a1fd1..254a38cace2a 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,8 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c9a2103faeb9..e3c8105ada65 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -209,9 +209,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
r = 1;
break;
@@ -221,6 +218,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
case KVM_CAP_MSI_DEVID:
if (!kvm)
r = -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 4e40d1955e35..96af65a30d78 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+ hsr);
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x\n",
- (unsigned int)kvm_vcpu_get_hsr(vcpu));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 3c2cb5d5adfa..0bb0e9c6376c 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -411,3 +411,4 @@
394 common pkey_mprotect sys_pkey_mprotect
395 common pkey_alloc sys_pkey_alloc
396 common pkey_free sys_pkey_free
+397 common statx sys_statx
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..3741859765cf 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
If unsure, say Y.
+config QCOM_QDF2400_ERRATUM_0065
+ bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+ default y
+ help
+ On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+ ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+ been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+ If unsure, say Y.
+
endmenu
@@ -1063,6 +1073,10 @@ config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
+config KEYS_COMPAT
+ def_bool y
+ depends on COMPAT && KEYS
+
endmenu
menu "Power management options"
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 05310ad8c5ab..f31c48d0cd68 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
static inline bool system_uses_ttbr0_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_cap(ARM64_HAS_PAN);
+ !cpus_have_const_cap(ARM64_HAS_PAN);
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f21fd3894370..522e4f60976e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,9 +30,7 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_USER_MEM_SLOTS 512
#define KVM_HALT_POLL_NS_DEFAULT 500000
#include <kvm/arm_vgic.h>
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index c2860358ae3e..aa5ab69c1312 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,8 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 75a0f8acef66..fd691087dc9a 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
}
/**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
* @arg: argument to pass to CPU suspend operations
*
* Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 2a07aae5b8a2..c5c45942fb6e 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
return 0;
}
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- return NOTIFY_DONE;
-}
-
static void __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p, *cur_kprobe;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 1bfe30dfbfe7..fa1b18e364fc 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+ hsr, esr_get_class_string(hsr));
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
u32 hsr = kvm_vcpu_get_hsr(vcpu);
u8 hsr_ec = ESR_ELx_EC(hsr);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index e8e7ba2bc11f..9e1d2b75eecd 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,14 +18,62 @@
#include <asm/kvm_hyp.h>
#include <asm/tlbflush.h>
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+ u64 val;
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ */
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+ __tlb_switch_to_guest_nvhe,
+ __tlb_switch_to_guest_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+ __tlb_switch_to_host_nvhe,
+ __tlb_switch_to_host_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
/*
* We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
dsb(ish);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
/* Switch to requested VMID */
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
__tlbi(vmalle1);
dsb(nsh);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_flush_vm_context(void)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 55d1e9205543..687a358a3733 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -162,7 +162,7 @@ void __init kasan_init(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
- pfn_to_nid(virt_to_pfn(_text)));
+ pfn_to_nid(virt_to_pfn(lm_alias(_text))));
/*
* vmemmap_populate() has populated the shadow region that covers the
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a008a9f03072..0a4adbc326e6 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1687,6 +1687,7 @@ config CPU_CAVIUM_OCTEON
select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select MIPS_L1_CACHE_SHIFT_7
+ select HAVE_KVM
help
The Cavium Octeon processor is a highly integrated chip containing
many ethernet hardware widgets for networking tasks. The processor
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index e961c8a7ea66..494d38274142 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -444,6 +444,10 @@
# define cpu_has_msa 0
#endif
+#ifndef cpu_has_ufr
+# define cpu_has_ufr (cpu_data[0].options & MIPS_CPU_UFR)
+#endif
+
#ifndef cpu_has_fre
# define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE)
#endif
@@ -528,6 +532,9 @@
#ifndef cpu_guest_has_htw
#define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW)
#endif
+#ifndef cpu_guest_has_mvh
+#define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH)
+#endif
#ifndef cpu_guest_has_msa
#define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA)
#endif
@@ -543,6 +550,9 @@
#ifndef cpu_guest_has_maar
#define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR)
#endif
+#ifndef cpu_guest_has_userlocal
+#define cpu_guest_has_userlocal (cpu_data[0].guest.options & MIPS_CPU_ULRI)
+#endif
/*
* Guest dynamic capabilities
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index edbe2734a1bf..be3b4c25f335 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -33,6 +33,7 @@ struct guest_info {
unsigned long ases_dyn;
unsigned long long options;
unsigned long long options_dyn;
+ int tlbsize;
u8 conf;
u8 kscratch_mask;
};
@@ -109,6 +110,7 @@ struct cpuinfo_mips {
struct guest_info guest;
unsigned int gtoffset_mask;
unsigned int guestid_mask;
+ unsigned int guestid_cache;
} __attribute__((aligned(SMP_CACHE_BYTES)));
extern struct cpuinfo_mips cpu_data[];
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9a8372484edc..98f59307e6a3 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -415,6 +415,7 @@ enum cpu_type_enum {
#define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */
#define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */
#define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */
+#define MIPS_CPU_UFR MBIT_ULL(53) /* CPU supports User mode FR switching */
/*
* CPU ASE encodings
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 05e785fc061d..2998479fd4e8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -10,6 +10,7 @@
#ifndef __MIPS_KVM_HOST_H__
#define __MIPS_KVM_HOST_H__
+#include <linux/cpumask.h>
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
@@ -33,12 +34,23 @@
#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
+#define KVM_REG_MIPS_CP0_CONTEXTCONFIG MIPS_CP0_32(4, 1)
#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
+#define KVM_REG_MIPS_CP0_XCONTEXTCONFIG MIPS_CP0_64(4, 3)
#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
+#define KVM_REG_MIPS_CP0_SEGCTL0 MIPS_CP0_64(5, 2)
+#define KVM_REG_MIPS_CP0_SEGCTL1 MIPS_CP0_64(5, 3)
+#define KVM_REG_MIPS_CP0_SEGCTL2 MIPS_CP0_64(5, 4)
+#define KVM_REG_MIPS_CP0_PWBASE MIPS_CP0_64(5, 5)
+#define KVM_REG_MIPS_CP0_PWFIELD MIPS_CP0_64(5, 6)
+#define KVM_REG_MIPS_CP0_PWSIZE MIPS_CP0_64(5, 7)
#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
+#define KVM_REG_MIPS_CP0_PWCTL MIPS_CP0_32(6, 6)
#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
+#define KVM_REG_MIPS_CP0_BADINSTR MIPS_CP0_32(8, 1)
+#define KVM_REG_MIPS_CP0_BADINSTRP MIPS_CP0_32(8, 2)
#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
@@ -55,6 +67,7 @@
#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4)
#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5)
#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
+#define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2)
#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
#define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2)
@@ -70,9 +83,13 @@
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 0
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000
+#ifdef CONFIG_KVM_MIPS_VZ
+extern unsigned long GUESTID_MASK;
+extern unsigned long GUESTID_FIRST_VERSION;
+extern unsigned long GUESTID_VERSION_MASK;
+#endif
/*
@@ -145,6 +162,16 @@ struct kvm_vcpu_stat {
u64 fpe_exits;
u64 msa_disabled_exits;
u64 flush_dcache_exits;
+#ifdef CONFIG_KVM_MIPS_VZ
+ u64 vz_gpsi_exits;
+ u64 vz_gsfc_exits;
+ u64 vz_hc_exits;
+ u64 vz_grr_exits;
+ u64 vz_gva_exits;
+ u64 vz_ghfc_exits;
+ u64 vz_gpa_exits;
+ u64 vz_resvd_exits;
+#endif
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
@@ -157,6 +184,8 @@ struct kvm_arch_memory_slot {
struct kvm_arch {
/* Guest physical mm */
struct mm_struct gpa_mm;
+ /* Mask of CPUs needing GPA ASID flush */
+ cpumask_t asid_flush_mask;
};
#define N_MIPS_COPROC_REGS 32
@@ -214,6 +243,11 @@ struct mips_coproc {
#define MIPS_CP0_CONFIG4_SEL 4
#define MIPS_CP0_CONFIG5_SEL 5
+#define MIPS_CP0_GUESTCTL2 10
+#define MIPS_CP0_GUESTCTL2_SEL 5
+#define MIPS_CP0_GTOFFSET 12
+#define MIPS_CP0_GTOFFSET_SEL 7
+
/* Resume Flags */
#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
@@ -229,6 +263,7 @@ enum emulation_result {
EMULATE_WAIT, /* WAIT instruction */
EMULATE_PRIV_FAIL,
EMULATE_EXCEPT, /* A guest exception has been generated */
+ EMULATE_HYPERCALL, /* HYPCALL instruction */
};
#define mips3_paddr_to_tlbpfn(x) \
@@ -276,13 +311,18 @@ struct kvm_mmu_memory_cache {
struct kvm_vcpu_arch {
void *guest_ebase;
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+ /* Host registers preserved across guest mode execution */
unsigned long host_stack;
unsigned long host_gp;
+ unsigned long host_pgd;
+ unsigned long host_entryhi;
/* Host CP0 registers used when handling exits from guest */
unsigned long host_cp0_badvaddr;
unsigned long host_cp0_epc;
u32 host_cp0_cause;
+ u32 host_cp0_guestctl0;
u32 host_cp0_badinstr;
u32 host_cp0_badinstrp;
@@ -340,7 +380,23 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* vcpu's vzguestid is different on each host cpu in an smp system */
+ u32 vzguestid[NR_CPUS];
+
+ /* wired guest TLB entries */
+ struct kvm_mips_tlb *wired_tlb;
+ unsigned int wired_tlb_limit;
+ unsigned int wired_tlb_used;
+
+ /* emulated guest MAAR registers */
+ unsigned long maar[6];
+#endif
+
+ /* Last CPU the VCPU state was loaded on */
int last_sched_cpu;
+ /* Last CPU the VCPU actually executed guest code on */
+ int last_exec_cpu;
/* WAIT executed */
int wait;
@@ -349,78 +405,6 @@ struct kvm_vcpu_arch {
u8 msa_enabled;
};
-
-#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
-#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
-#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
-#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val))
-#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
-#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val))
-#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
-#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
-#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
-#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
-#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
-#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
-#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
-#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
-#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0])
-#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val))
-#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
-#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
-#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
-#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
-#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
-#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
-#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
-#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
-#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
-#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
-#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
-#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
-#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
-#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
-#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
-#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
-#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
-#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
-#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
-#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
-#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
-#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
-#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
-#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
-#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4])
-#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5])
-#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
-#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
-#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
-#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
-#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
-#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
-#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
-#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
-#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
-#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
-#define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2])
-#define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3])
-#define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4])
-#define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5])
-#define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6])
-#define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7])
-#define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val))
-#define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val))
-#define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val))
-#define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val))
-#define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val))
-#define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val))
-
-/*
- * Some of the guest registers may be modified asynchronously (e.g. from a
- * hrtimer callback in hard irq context) and therefore need stronger atomicity
- * guarantees than other registers.
- */
-
static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
unsigned long val)
{
@@ -471,26 +455,286 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
} while (unlikely(!temp));
}
-#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
-#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
+/* Guest register types, used in accessor build below */
+#define __KVMT32 u32
+#define __KVMTl unsigned long
-/* Cause can be modified asynchronously from hardirq hrtimer callback */
-#define kvm_set_c0_guest_cause(cop0, val) \
- _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
-#define kvm_clear_c0_guest_cause(cop0, val) \
- _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
-#define kvm_change_c0_guest_cause(cop0, change, val) \
- _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \
- change, val)
-
-#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
-#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
-#define kvm_change_c0_guest_ebase(cop0, change, val) \
+/*
+ * __BUILD_KVM_$ops_SAVED(): kvm_$op_sw_gc0_$reg()
+ * These operate on the saved guest C0 state in RAM.
+ */
+
+/* Generate saved context simple accessors */
+#define __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+static inline __KVMT##type kvm_read_sw_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ return cop0->reg[(_reg)][(sel)]; \
+} \
+static inline void kvm_write_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] = val; \
+}
+
+/* Generate saved context bitwise modifiers */
+#define __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] |= val; \
+} \
+static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] &= ~val; \
+} \
+static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ unsigned long _mask = mask; \
+ cop0->reg[(_reg)][(sel)] &= ~_mask; \
+ cop0->reg[(_reg)][(sel)] |= val & _mask; \
+}
+
+/* Generate saved context atomic bitwise modifiers */
+#define __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
+static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_set_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
+} \
+static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_clear_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
+} \
+static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_change_c0_guest_reg(&cop0->reg[(_reg)][(sel)], mask, \
+ val); \
+}
+
+/*
+ * __BUILD_KVM_$ops_VZ(): kvm_$op_vz_gc0_$reg()
+ * These operate on the VZ guest C0 context in hardware.
+ */
+
+/* Generate VZ guest context simple accessors */
+#define __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
+static inline __KVMT##type kvm_read_vz_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ return read_gc0_##name(); \
+} \
+static inline void kvm_write_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ write_gc0_##name(val); \
+}
+
+/* Generate VZ guest context bitwise modifiers */
+#define __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
+static inline void kvm_set_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ set_gc0_##name(val); \
+} \
+static inline void kvm_clear_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ clear_gc0_##name(val); \
+} \
+static inline void kvm_change_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ change_gc0_##name(mask, val); \
+}
+
+/* Generate VZ guest context save/restore to/from saved context */
+#define __BUILD_KVM_SAVE_VZ(name, _reg, sel) \
+static inline void kvm_restore_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ write_gc0_##name(cop0->reg[(_reg)][(sel)]); \
+} \
+static inline void kvm_save_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ cop0->reg[(_reg)][(sel)] = read_gc0_##name(); \
+}
+
+/*
+ * __BUILD_KVM_$ops_WRAP(): kvm_$op_$name1() -> kvm_$op_$name2()
+ * These wrap a set of operations to provide them with a different name.
+ */
+
+/* Generate simple accessor wrapper */
+#define __BUILD_KVM_RW_WRAP(name1, name2, type) \
+static inline __KVMT##type kvm_read_##name1(struct mips_coproc *cop0) \
+{ \
+ return kvm_read_##name2(cop0); \
+} \
+static inline void kvm_write_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ kvm_write_##name2(cop0, val); \
+}
+
+/* Generate bitwise modifier wrapper */
+#define __BUILD_KVM_SET_WRAP(name1, name2, type) \
+static inline void kvm_set_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
{ \
- kvm_clear_c0_guest_ebase(cop0, change); \
- kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
+ kvm_set_##name2(cop0, val); \
+} \
+static inline void kvm_clear_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ kvm_clear_##name2(cop0, val); \
+} \
+static inline void kvm_change_##name1(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ kvm_change_##name2(cop0, mask, val); \
}
+/*
+ * __BUILD_KVM_$ops_SW(): kvm_$op_c0_guest_$reg() -> kvm_$op_sw_gc0_$reg()
+ * These generate accessors operating on the saved context in RAM, and wrap them
+ * with the common guest C0 accessors (for use by common emulation code).
+ */
+
+#define __BUILD_KVM_RW_SW(name, type, _reg, sel) \
+ __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_RW_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#define __BUILD_KVM_SET_SW(name, type, _reg, sel) \
+ __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#define __BUILD_KVM_ATOMIC_SW(name, type, _reg, sel) \
+ __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#ifndef CONFIG_KVM_MIPS_VZ
+
+/*
+ * T&E (trap & emulate software based virtualisation)
+ * We generate the common accessors operating exclusively on the saved context
+ * in RAM.
+ */
+
+#define __BUILD_KVM_RW_HW __BUILD_KVM_RW_SW
+#define __BUILD_KVM_SET_HW __BUILD_KVM_SET_SW
+#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_ATOMIC_SW
+
+#else
+
+/*
+ * VZ (hardware assisted virtualisation)
+ * These macros use the active guest state in VZ mode (hardware registers),
+ */
+
+/*
+ * __BUILD_KVM_$ops_HW(): kvm_$op_c0_guest_$reg() -> kvm_$op_vz_gc0_$reg()
+ * These generate accessors operating on the VZ guest context in hardware, and
+ * wrap them with the common guest C0 accessors (for use by common emulation
+ * code).
+ *
+ * Accessors operating on the saved context in RAM are also generated to allow
+ * convenient explicit saving and restoring of the state.
+ */
+
+#define __BUILD_KVM_RW_HW(name, type, _reg, sel) \
+ __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
+ __BUILD_KVM_RW_WRAP(c0_guest_##name, vz_gc0_##name, type) \
+ __BUILD_KVM_SAVE_VZ(name, _reg, sel)
+
+#define __BUILD_KVM_SET_HW(name, type, _reg, sel) \
+ __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, vz_gc0_##name, type)
+
+/*
+ * We can't do atomic modifications of COP0 state if hardware can modify it.
+ * Races must be handled explicitly.
+ */
+#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_SET_HW
+
+#endif
+
+/*
+ * Define accessors for CP0 registers that are accessible to the guest. These
+ * are primarily used by common emulation code, which may need to access the
+ * registers differently depending on the implementation.
+ *
+ * fns_hw/sw name type reg num select
+ */
+__BUILD_KVM_RW_HW(index, 32, MIPS_CP0_TLB_INDEX, 0)
+__BUILD_KVM_RW_HW(entrylo0, l, MIPS_CP0_TLB_LO0, 0)
+__BUILD_KVM_RW_HW(entrylo1, l, MIPS_CP0_TLB_LO1, 0)
+__BUILD_KVM_RW_HW(context, l, MIPS_CP0_TLB_CONTEXT, 0)
+__BUILD_KVM_RW_HW(contextconfig, 32, MIPS_CP0_TLB_CONTEXT, 1)
+__BUILD_KVM_RW_HW(userlocal, l, MIPS_CP0_TLB_CONTEXT, 2)
+__BUILD_KVM_RW_HW(xcontextconfig, l, MIPS_CP0_TLB_CONTEXT, 3)
+__BUILD_KVM_RW_HW(pagemask, l, MIPS_CP0_TLB_PG_MASK, 0)
+__BUILD_KVM_RW_HW(pagegrain, 32, MIPS_CP0_TLB_PG_MASK, 1)
+__BUILD_KVM_RW_HW(segctl0, l, MIPS_CP0_TLB_PG_MASK, 2)
+__BUILD_KVM_RW_HW(segctl1, l, MIPS_CP0_TLB_PG_MASK, 3)
+__BUILD_KVM_RW_HW(segctl2, l, MIPS_CP0_TLB_PG_MASK, 4)
+__BUILD_KVM_RW_HW(pwbase, l, MIPS_CP0_TLB_PG_MASK, 5)
+__BUILD_KVM_RW_HW(pwfield, l, MIPS_CP0_TLB_PG_MASK, 6)
+__BUILD_KVM_RW_HW(pwsize, l, MIPS_CP0_TLB_PG_MASK, 7)
+__BUILD_KVM_RW_HW(wired, 32, MIPS_CP0_TLB_WIRED, 0)
+__BUILD_KVM_RW_HW(pwctl, 32, MIPS_CP0_TLB_WIRED, 6)
+__BUILD_KVM_RW_HW(hwrena, 32, MIPS_CP0_HWRENA, 0)
+__BUILD_KVM_RW_HW(badvaddr, l, MIPS_CP0_BAD_VADDR, 0)
+__BUILD_KVM_RW_HW(badinstr, 32, MIPS_CP0_BAD_VADDR, 1)
+__BUILD_KVM_RW_HW(badinstrp, 32, MIPS_CP0_BAD_VADDR, 2)
+__BUILD_KVM_RW_SW(count, 32, MIPS_CP0_COUNT, 0)
+__BUILD_KVM_RW_HW(entryhi, l, MIPS_CP0_TLB_HI, 0)
+__BUILD_KVM_RW_HW(compare, 32, MIPS_CP0_COMPARE, 0)
+__BUILD_KVM_RW_HW(status, 32, MIPS_CP0_STATUS, 0)
+__BUILD_KVM_RW_HW(intctl, 32, MIPS_CP0_STATUS, 1)
+__BUILD_KVM_RW_HW(cause, 32, MIPS_CP0_CAUSE, 0)
+__BUILD_KVM_RW_HW(epc, l, MIPS_CP0_EXC_PC, 0)
+__BUILD_KVM_RW_SW(prid, 32, MIPS_CP0_PRID, 0)
+__BUILD_KVM_RW_HW(ebase, l, MIPS_CP0_PRID, 1)
+__BUILD_KVM_RW_HW(config, 32, MIPS_CP0_CONFIG, 0)
+__BUILD_KVM_RW_HW(config1, 32, MIPS_CP0_CONFIG, 1)
+__BUILD_KVM_RW_HW(config2, 32, MIPS_CP0_CONFIG, 2)
+__BUILD_KVM_RW_HW(config3, 32, MIPS_CP0_CONFIG, 3)
+__BUILD_KVM_RW_HW(config4, 32, MIPS_CP0_CONFIG, 4)
+__BUILD_KVM_RW_HW(config5, 32, MIPS_CP0_CONFIG, 5)
+__BUILD_KVM_RW_HW(config6, 32, MIPS_CP0_CONFIG, 6)
+__BUILD_KVM_RW_HW(config7, 32, MIPS_CP0_CONFIG, 7)
+__BUILD_KVM_RW_SW(maari, l, MIPS_CP0_LLADDR, 2)
+__BUILD_KVM_RW_HW(xcontext, l, MIPS_CP0_TLB_XCONTEXT, 0)
+__BUILD_KVM_RW_HW(errorepc, l, MIPS_CP0_ERROR_PC, 0)
+__BUILD_KVM_RW_HW(kscratch1, l, MIPS_CP0_DESAVE, 2)
+__BUILD_KVM_RW_HW(kscratch2, l, MIPS_CP0_DESAVE, 3)
+__BUILD_KVM_RW_HW(kscratch3, l, MIPS_CP0_DESAVE, 4)
+__BUILD_KVM_RW_HW(kscratch4, l, MIPS_CP0_DESAVE, 5)
+__BUILD_KVM_RW_HW(kscratch5, l, MIPS_CP0_DESAVE, 6)
+__BUILD_KVM_RW_HW(kscratch6, l, MIPS_CP0_DESAVE, 7)
+
+/* Bitwise operations (on HW state) */
+__BUILD_KVM_SET_HW(status, 32, MIPS_CP0_STATUS, 0)
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+__BUILD_KVM_ATOMIC_HW(cause, 32, MIPS_CP0_CAUSE, 0)
+__BUILD_KVM_SET_HW(ebase, l, MIPS_CP0_PRID, 1)
+
+/* Bitwise operations (on saved state) */
+__BUILD_KVM_SET_SAVED(config, 32, MIPS_CP0_CONFIG, 0)
+__BUILD_KVM_SET_SAVED(config1, 32, MIPS_CP0_CONFIG, 1)
+__BUILD_KVM_SET_SAVED(config2, 32, MIPS_CP0_CONFIG, 2)
+__BUILD_KVM_SET_SAVED(config3, 32, MIPS_CP0_CONFIG, 3)
+__BUILD_KVM_SET_SAVED(config4, 32, MIPS_CP0_CONFIG, 4)
+__BUILD_KVM_SET_SAVED(config5, 32, MIPS_CP0_CONFIG, 5)
+
/* Helpers */
static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
@@ -531,6 +775,10 @@ struct kvm_mips_callbacks {
int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
int (*handle_fpe)(struct kvm_vcpu *vcpu);
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
+ int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
+ int (*hardware_enable)(void);
+ void (*hardware_disable)(void);
+ int (*check_extension)(struct kvm *kvm, long ext);
int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -599,6 +847,10 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
+ struct kvm_vcpu *vcpu, bool write_fault);
+#endif
extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
struct kvm_vcpu *vcpu,
bool write_fault);
@@ -625,6 +877,18 @@ extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi,
extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
unsigned long entryhi);
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
+int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa);
+void kvm_vz_local_flush_roottlb_all_guests(void);
+void kvm_vz_local_flush_guesttlb_all(void);
+void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count);
+void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count);
+#endif
+
void kvm_mips_suspend_mm(int cpu);
void kvm_mips_resume_mm(int cpu);
@@ -795,7 +1059,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack);
-void kvm_mips_init_count(struct kvm_vcpu *vcpu);
+void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz);
int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
@@ -803,6 +1067,20 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
+/* fairly internal functions requiring some care to use */
+int kvm_mips_count_disabled(struct kvm_vcpu *vcpu);
+ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count);
+int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
+ u32 count, int min_drift);
+
+#ifdef CONFIG_KVM_MIPS_VZ
+void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu);
+void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu);
+#else
+static inline void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {}
+#endif
+
enum emulation_result kvm_mips_check_privilege(u32 cause,
u32 *opc,
struct kvm_run *run,
@@ -827,11 +1105,20 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
+/* COP0 */
+enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu);
+
unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+/* Hypercalls (hypcall.c) */
+
+enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
+ union mips_instruction inst);
+int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu);
+
/* Dynamic binary translation */
extern int kvm_mips_trans_cache_index(union mips_instruction inst,
u32 *opc, struct kvm_vcpu *vcpu);
@@ -846,7 +1133,6 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
-static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h
index 21d9607c80d7..e10f78befbd9 100644
--- a/arch/mips/include/asm/maar.h
+++ b/arch/mips/include/asm/maar.h
@@ -36,7 +36,7 @@ unsigned platform_maar_init(unsigned num_pairs);
* @upper: The highest address that the MAAR pair will affect. Must be
* aligned to one byte before a 2^16 byte boundary.
* @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
- * MIPS_MAAR_V attribute will automatically be set.
+ * MIPS_MAAR_VL attribute will automatically be set.
*
* Program the pair of MAAR registers specified by idx to apply the attributes
* specified by attrs to the range of addresses from lower to higher.
@@ -49,10 +49,10 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower,
BUG_ON(((upper & 0xffff) != 0xffff)
|| ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4)));
- /* Automatically set MIPS_MAAR_V */
- attrs |= MIPS_MAAR_V;
+ /* Automatically set MIPS_MAAR_VL */
+ attrs |= MIPS_MAAR_VL;
- /* Write the upper address & attributes (only MIPS_MAAR_V matters) */
+ /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */
write_c0_maari(idx << 1);
back_to_back_c0_hazard();
write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs);
@@ -81,7 +81,7 @@ extern void maar_init(void);
* @upper: The highest address that the MAAR pair will affect. Must be
* aligned to one byte before a 2^16 byte boundary.
* @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
- * MIPS_MAAR_V attribute will automatically be set.
+ * MIPS_MAAR_VL attribute will automatically be set.
*
* Describes the configuration of a pair of Memory Accessibility Attribute
* Registers - applying attributes from attrs to the range of physical
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index f8d1d2f1d80d..6875b69f59f7 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -34,8 +34,10 @@
*/
#ifdef __ASSEMBLY__
#define _ULCAST_
+#define _U64CAST_
#else
#define _ULCAST_ (unsigned long)
+#define _U64CAST_ (u64)
#endif
/*
@@ -217,8 +219,10 @@
/*
* Wired register bits
*/
-#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16)
-#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0)
+#define MIPSR6_WIRED_LIMIT_SHIFT 16
+#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << MIPSR6_WIRED_LIMIT_SHIFT)
+#define MIPSR6_WIRED_WIRED_SHIFT 0
+#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << MIPSR6_WIRED_WIRED_SHIFT)
/*
* Values used for computation of new tlb entries
@@ -645,6 +649,7 @@
#define MIPS_CONF5_LLB (_ULCAST_(1) << 4)
#define MIPS_CONF5_MVH (_ULCAST_(1) << 5)
#define MIPS_CONF5_VP (_ULCAST_(1) << 7)
+#define MIPS_CONF5_SBRI (_ULCAST_(1) << 6)
#define MIPS_CONF5_FRE (_ULCAST_(1) << 8)
#define MIPS_CONF5_UFE (_ULCAST_(1) << 9)
#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27)
@@ -719,10 +724,14 @@
#define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13)
/* MAAR bit definitions */
+#define MIPS_MAAR_VH (_U64CAST_(1) << 63)
#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12)
#define MIPS_MAAR_ADDR_SHIFT 12
#define MIPS_MAAR_S (_ULCAST_(1) << 1)
-#define MIPS_MAAR_V (_ULCAST_(1) << 0)
+#define MIPS_MAAR_VL (_ULCAST_(1) << 0)
+
+/* MAARI bit definitions */
+#define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0)
/* EBase bit definitions */
#define MIPS_EBASE_CPUNUM_SHIFT 0
@@ -736,6 +745,10 @@
#define MIPS_CMGCRB_BASE 11
#define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1))
+/* LLAddr bit definitions */
+#define MIPS_LLADDR_LLB_SHIFT 0
+#define MIPS_LLADDR_LLB (_ULCAST_(1) << MIPS_LLADDR_LLB_SHIFT)
+
/*
* Bits in the MIPS32 Memory Segmentation registers.
*/
@@ -961,6 +974,22 @@
/* Flush FTLB */
#define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13)
+/* CvmCtl register field definitions */
+#define CVMCTL_IPPCI_SHIFT 7
+#define CVMCTL_IPPCI (_U64CAST_(0x7) << CVMCTL_IPPCI_SHIFT)
+#define CVMCTL_IPTI_SHIFT 4
+#define CVMCTL_IPTI (_U64CAST_(0x7) << CVMCTL_IPTI_SHIFT)
+
+/* CvmMemCtl2 register field definitions */
+#define CVMMEMCTL2_INHIBITTS (_U64CAST_(1) << 17)
+
+/* CvmVMConfig register field definitions */
+#define CVMVMCONF_DGHT (_U64CAST_(1) << 60)
+#define CVMVMCONF_MMUSIZEM1_S 12
+#define CVMVMCONF_MMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_MMUSIZEM1_S)
+#define CVMVMCONF_RMMUSIZEM1_S 0
+#define CVMVMCONF_RMMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_RMMUSIZEM1_S)
+
/*
* Coprocessor 1 (FPU) register names
*/
@@ -1720,6 +1749,13 @@ do { \
#define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7)
#define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val)
+
+#define read_c0_cvmmemctl2() __read_64bit_c0_register($16, 6)
+#define write_c0_cvmmemctl2(val) __write_64bit_c0_register($16, 6, val)
+
+#define read_c0_cvmvmconfig() __read_64bit_c0_register($16, 7)
+#define write_c0_cvmvmconfig(val) __write_64bit_c0_register($16, 7, val)
+
/*
* The cacheerr registers are not standardized. On OCTEON, they are
* 64 bits wide.
@@ -1989,6 +2025,8 @@ do { \
#define read_gc0_epc() __read_ulong_gc0_register(14, 0)
#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val)
+#define read_gc0_prid() __read_32bit_gc0_register(15, 0)
+
#define read_gc0_ebase() __read_32bit_gc0_register(15, 1)
#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val)
@@ -2012,6 +2050,9 @@ do { \
#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val)
#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val)
+#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0)
+#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val)
+
#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0)
#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1)
#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2)
@@ -2090,6 +2131,19 @@ do { \
#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val)
#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val)
+/* Cavium OCTEON (cnMIPS) */
+#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6)
+#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val)
+
+#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7)
+#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val)
+
+#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7)
+#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val)
+
+#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6)
+#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val)
+
/*
* Macros to access the floating point coprocessor control registers
*/
@@ -2696,9 +2750,11 @@ __BUILD_SET_C0(brcm_mode)
*/
#define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name)
+__BUILD_SET_GC0(wired)
__BUILD_SET_GC0(status)
__BUILD_SET_GC0(cause)
__BUILD_SET_GC0(ebase)
+__BUILD_SET_GC0(config1)
/*
* Return low 10 bits of ebase.
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index dd179fd8acda..939734de4359 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -21,9 +21,11 @@
*/
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-#define UNIQUE_ENTRYHI(idx) \
- ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \
+#define _UNIQUE_ENTRYHI(base, idx) \
+ (((base) + ((idx) << (PAGE_SHIFT + 1))) | \
(cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
+#define UNIQUE_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG0, idx)
+#define UNIQUE_GUEST_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG1, idx)
static inline unsigned int num_wired_entries(void)
{
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 77429d1622b3..b5e46ae872d3 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -179,7 +179,7 @@ enum cop0_coi_func {
tlbr_op = 0x01, tlbwi_op = 0x02,
tlbwr_op = 0x06, tlbp_op = 0x08,
rfe_op = 0x10, eret_op = 0x18,
- wait_op = 0x20,
+ wait_op = 0x20, hypcall_op = 0x28
};
/*
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index a8a0199bf760..0318c6b442ab 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -21,6 +21,8 @@
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
/*
* for KVM_GET_REGS and KVM_SET_REGS
*
@@ -54,9 +56,14 @@ struct kvm_fpu {
* Register set = 0: GP registers from kvm_regs (see definitions below).
*
* Register set = 1: CP0 registers.
- * bits[15..8] - Must be zero.
- * bits[7..3] - Register 'rd' index.
- * bits[2..0] - Register 'sel' index.
+ * bits[15..8] - COP0 register set.
+ *
+ * COP0 register set = 0: Main CP0 registers.
+ * bits[7..3] - Register 'rd' index.
+ * bits[2..0] - Register 'sel' index.
+ *
+ * COP0 register set = 1: MAARs.
+ * bits[7..0] - MAAR index.
*
* Register set = 2: KVM specific registers (see definitions below).
*
@@ -115,6 +122,15 @@ struct kvm_fpu {
/*
+ * KVM_REG_MIPS_CP0 - Coprocessor 0 registers.
+ */
+
+#define KVM_REG_MIPS_MAAR (KVM_REG_MIPS_CP0 | (1 << 8))
+#define KVM_REG_MIPS_CP0_MAAR(n) (KVM_REG_MIPS_MAAR | \
+ KVM_REG_SIZE_U64 | (n))
+
+
+/*
* KVM_REG_MIPS_KVM - KVM specific control registers.
*/
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 07718bb5fc9d..c72a4cda389c 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -289,6 +289,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
if (c->fpu_id & MIPS_FPIR_3D)
c->ases |= MIPS_ASE_MIPS3D;
+ if (c->fpu_id & MIPS_FPIR_UFRP)
+ c->options |= MIPS_CPU_UFR;
if (c->fpu_id & MIPS_FPIR_FREP)
c->options |= MIPS_CPU_FRE;
}
@@ -1003,7 +1005,8 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
unsigned int config3, config3_dyn;
probe_gc0_config_dyn(config3, config3, config3_dyn,
- MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC);
+ MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_ULRI |
+ MIPS_CONF3_CTXTC);
if (config3 & MIPS_CONF3_CTXTC)
c->guest.options |= MIPS_CPU_CTXTC;
@@ -1013,6 +1016,9 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
if (config3 & MIPS_CONF3_PW)
c->guest.options |= MIPS_CPU_HTW;
+ if (config3 & MIPS_CONF3_ULRI)
+ c->guest.options |= MIPS_CPU_ULRI;
+
if (config3 & MIPS_CONF3_SC)
c->guest.options |= MIPS_CPU_SEGMENTS;
@@ -1051,7 +1057,7 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
unsigned int config5, config5_dyn;
probe_gc0_config_dyn(config5, config5, config5_dyn,
- MIPS_CONF_M | MIPS_CONF5_MRP);
+ MIPS_CONF_M | MIPS_CONF5_MVH | MIPS_CONF5_MRP);
if (config5 & MIPS_CONF5_MRP)
c->guest.options |= MIPS_CPU_MAAR;
@@ -1061,6 +1067,9 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
if (config5 & MIPS_CONF5_LLB)
c->guest.options |= MIPS_CPU_RW_LLB;
+ if (config5 & MIPS_CONF5_MVH)
+ c->guest.options |= MIPS_CPU_MVH;
+
if (config5 & MIPS_CONF_M)
c->guest.conf |= BIT(6);
return config5 & MIPS_CONF_M;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index a7f81261c781..c036157fb891 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq);
*/
unsigned int mips_hpt_frequency;
+EXPORT_SYMBOL_GPL(mips_hpt_frequency);
/*
* This function exists in order to cause an error due to a duplicate
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 65067327db12..50a722dfb236 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -26,11 +26,34 @@ config KVM
select SRCU
---help---
Support for hosting Guest kernels.
- Currently supported on MIPS32 processors.
+
+choice
+ prompt "Virtualization mode"
+ depends on KVM
+ default KVM_MIPS_TE
+
+config KVM_MIPS_TE
+ bool "Trap & Emulate"
+ ---help---
+ Use trap and emulate to virtualize 32-bit guests in user mode. This
+ does not require any special hardware Virtualization support beyond
+ standard MIPS32/64 r2 or later, but it does require the guest kernel
+ to be configured with CONFIG_KVM_GUEST=y so that it resides in the
+ user address segment.
+
+config KVM_MIPS_VZ
+ bool "MIPS Virtualization (VZ) ASE"
+ ---help---
+ Use the MIPS Virtualization (VZ) ASE to virtualize guests. This
+ supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n),
+ but requires hardware support.
+
+endchoice
config KVM_MIPS_DYN_TRANS
bool "KVM/MIPS: Dynamic binary translation to reduce traps"
- depends on KVM
+ depends on KVM_MIPS_TE
+ default y
---help---
When running in Trap & Emulate mode patch privileged
instructions to reduce the number of traps.
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 847429de780d..45d90f5d5177 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -9,8 +9,15 @@ common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
interrupt.o stats.o commpage.o \
- dyntrans.o trap_emul.o fpu.o
+ fpu.o
+kvm-objs += hypcall.o
kvm-objs += mmu.o
+ifdef CONFIG_KVM_MIPS_VZ
+kvm-objs += vz.o
+else
+kvm-objs += dyntrans.o
+kvm-objs += trap_emul.o
+endif
obj-$(CONFIG_KVM) += kvm.o
obj-y += callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d40cfaad4529..34e78a3ee9d7 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -308,7 +308,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
* CP0_Cause.DC bit or the count_ctl.DC bit.
* 0 otherwise (in which case CP0_Count timer is running).
*/
-static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
+int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -467,7 +467,7 @@ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu)
*
* Returns: The ktime at the point of freeze.
*/
-static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
+ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
{
ktime_t now;
@@ -517,6 +517,82 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
}
/**
+ * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry.
+ * @vcpu: Virtual CPU.
+ * @before: Time before Count was saved, lower bound of drift calculation.
+ * @count: CP0_Count at point of restore.
+ * @min_drift: Minimum amount of drift permitted before correction.
+ * Must be <= 0.
+ *
+ * Restores the timer from a particular @count, accounting for drift. This can
+ * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is
+ * to be used for a period of time, but the exact ktime corresponding to the
+ * final Count that must be restored is not known.
+ *
+ * It is gauranteed that a timer interrupt immediately after restore will be
+ * handled, but not if CP0_Compare is exactly at @count. That case should
+ * already be handled when the hardware timer state is saved.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not
+ * stopped).
+ *
+ * Returns: Amount of correction to count_bias due to drift.
+ */
+int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
+ u32 count, int min_drift)
+{
+ ktime_t now, count_time;
+ u32 now_count, before_count;
+ u64 delta;
+ int drift, ret = 0;
+
+ /* Calculate expected count at before */
+ before_count = vcpu->arch.count_bias +
+ kvm_mips_ktime_to_count(vcpu, before);
+
+ /*
+ * Detect significantly negative drift, where count is lower than
+ * expected. Some negative drift is expected when hardware counter is
+ * set after kvm_mips_freeze_timer(), and it is harmless to allow the
+ * time to jump forwards a little, within reason. If the drift is too
+ * significant, adjust the bias to avoid a big Guest.CP0_Count jump.
+ */
+ drift = count - before_count;
+ if (drift < min_drift) {
+ count_time = before;
+ vcpu->arch.count_bias += drift;
+ ret = drift;
+ goto resume;
+ }
+
+ /* Calculate expected count right now */
+ now = ktime_get();
+ now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+
+ /*
+ * Detect positive drift, where count is higher than expected, and
+ * adjust the bias to avoid guest time going backwards.
+ */
+ drift = count - now_count;
+ if (drift > 0) {
+ count_time = now;
+ vcpu->arch.count_bias += drift;
+ ret = drift;
+ goto resume;
+ }
+
+ /* Subtract nanosecond delta to find ktime when count was read */
+ delta = (u64)(u32)(now_count - count);
+ delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
+ count_time = ktime_sub_ns(now, delta);
+
+resume:
+ /* Resume using the calculated ktime */
+ kvm_mips_resume_hrtimer(vcpu, count_time, count);
+ return ret;
+}
+
+/**
* kvm_mips_write_count() - Modify the count and update timer.
* @vcpu: Virtual CPU.
* @count: Guest CP0_Count value to set.
@@ -543,16 +619,15 @@ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count)
/**
* kvm_mips_init_count() - Initialise timer.
* @vcpu: Virtual CPU.
+ * @count_hz: Frequency of timer.
*
- * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set
- * it going if it's enabled.
+ * Initialise the timer to the specified frequency, zero it, and set it going if
+ * it's enabled.
*/
-void kvm_mips_init_count(struct kvm_vcpu *vcpu)
+void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz)
{
- /* 100 MHz */
- vcpu->arch.count_hz = 100*1000*1000;
- vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
- vcpu->arch.count_hz);
+ vcpu->arch.count_hz = count_hz;
+ vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
vcpu->arch.count_dyn_bias = 0;
/* Starting at 0 */
@@ -622,7 +697,9 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
struct mips_coproc *cop0 = vcpu->arch.cop0;
int dc;
u32 old_compare = kvm_read_c0_guest_compare(cop0);
- ktime_t now;
+ s32 delta = compare - old_compare;
+ u32 cause;
+ ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */
u32 count;
/* if unchanged, must just be an ack */
@@ -634,6 +711,21 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
return;
}
+ /*
+ * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted
+ * too to prevent guest CP0_Count hitting guest CP0_Compare.
+ *
+ * The new GTOffset corresponds to the new value of CP0_Compare, and is
+ * set prior to it being written into the guest context. We disable
+ * preemption until the new value is written to prevent restore of a
+ * GTOffset corresponding to the old CP0_Compare value.
+ */
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) {
+ preempt_disable();
+ write_c0_gtoffset(compare - read_c0_count());
+ back_to_back_c0_hazard();
+ }
+
/* freeze_hrtimer() takes care of timer interrupts <= count */
dc = kvm_mips_count_disabled(vcpu);
if (!dc)
@@ -641,12 +733,36 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
if (ack)
kvm_mips_callbacks->dequeue_timer_int(vcpu);
+ else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ /*
+ * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so
+ * preserve guest CP0_Cause.TI if we don't want to ack it.
+ */
+ cause = kvm_read_c0_guest_cause(cop0);
kvm_write_c0_guest_compare(cop0, compare);
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ if (delta > 0)
+ preempt_enable();
+
+ back_to_back_c0_hazard();
+
+ if (!ack && cause & CAUSEF_TI)
+ kvm_write_c0_guest_cause(cop0, cause);
+ }
+
/* resume_hrtimer() takes care of timer interrupts > count */
if (!dc)
kvm_mips_resume_hrtimer(vcpu, now, count);
+
+ /*
+ * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change
+ * until after the new CP0_Compare is written, otherwise new guest
+ * CP0_Count could hit new guest CP0_Compare.
+ */
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0)
+ write_c0_gtoffset(compare - read_c0_count());
}
/**
@@ -857,6 +973,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
++vcpu->stat.wait_exits;
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT);
if (!vcpu->arch.pending_exceptions) {
+ kvm_vz_lose_htimer(vcpu);
vcpu->arch.wait = 1;
kvm_vcpu_block(vcpu);
@@ -873,17 +990,62 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-/*
- * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that
- * we can catch this, if things ever change
- */
+static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu,
+ unsigned long entryhi)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
+ int cpu, i;
+ u32 nasid = entryhi & KVM_ENTRYHI_ASID;
+
+ if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) {
+ trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) &
+ KVM_ENTRYHI_ASID, nasid);
+
+ /*
+ * Flush entries from the GVA page tables.
+ * Guest user page table will get flushed lazily on re-entry to
+ * guest user if the guest ASID actually changes.
+ */
+ kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN);
+
+ /*
+ * Regenerate/invalidate kernel MMU context.
+ * The user MMU context will be regenerated lazily on re-entry
+ * to guest user if the guest ASID actually changes.
+ */
+ preempt_disable();
+ cpu = smp_processor_id();
+ get_new_mmu_context(kern_mm, cpu);
+ for_each_possible_cpu(i)
+ if (i != cpu)
+ cpu_context(i, kern_mm) = 0;
+ preempt_enable();
+ }
+ kvm_write_c0_guest_entryhi(cop0, entryhi);
+}
+
enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_mips_tlb *tlb;
unsigned long pc = vcpu->arch.pc;
+ int index;
- kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0));
- return EMULATE_FAIL;
+ index = kvm_read_c0_guest_index(cop0);
+ if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) {
+ /* UNDEFINED */
+ kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index);
+ index &= KVM_MIPS_GUEST_TLB_SIZE - 1;
+ }
+
+ tlb = &vcpu->arch.guest_tlb[index];
+ kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask);
+ kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]);
+ kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]);
+ kvm_mips_change_entryhi(vcpu, tlb->tlb_hi);
+
+ return EMULATE_DONE;
}
/**
@@ -1105,11 +1267,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
- struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
enum emulation_result er = EMULATE_DONE;
u32 rt, rd, sel;
unsigned long curr_pc;
- int cpu, i;
/*
* Update PC and hold onto current PC in case there is
@@ -1143,6 +1303,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
case wait_op:
er = kvm_mips_emul_wait(vcpu);
break;
+ case hypcall_op:
+ er = kvm_mips_emul_hypcall(vcpu, inst);
+ break;
}
} else {
rt = inst.c0r_format.rt;
@@ -1208,44 +1371,8 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
kvm_change_c0_guest_ebase(cop0, 0x1ffff000,
vcpu->arch.gprs[rt]);
} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
- u32 nasid =
- vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
- if (((kvm_read_c0_guest_entryhi(cop0) &
- KVM_ENTRYHI_ASID) != nasid)) {
- trace_kvm_asid_change(vcpu,
- kvm_read_c0_guest_entryhi(cop0)
- & KVM_ENTRYHI_ASID,
- nasid);
-
- /*
- * Flush entries from the GVA page
- * tables.
- * Guest user page table will get
- * flushed lazily on re-entry to guest
- * user if the guest ASID actually
- * changes.
- */
- kvm_mips_flush_gva_pt(kern_mm->pgd,
- KMF_KERN);
-
- /*
- * Regenerate/invalidate kernel MMU
- * context.
- * The user MMU context will be
- * regenerated lazily on re-entry to
- * guest user if the guest ASID actually
- * changes.
- */
- preempt_disable();
- cpu = smp_processor_id();
- get_new_mmu_context(kern_mm, cpu);
- for_each_possible_cpu(i)
- if (i != cpu)
- cpu_context(i, kern_mm) = 0;
- preempt_enable();
- }
- kvm_write_c0_guest_entryhi(cop0,
- vcpu->arch.gprs[rt]);
+ kvm_mips_change_entryhi(vcpu,
+ vcpu->arch.gprs[rt]);
}
/* Are we writing to COUNT */
else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
@@ -1474,9 +1601,8 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
- enum emulation_result er = EMULATE_DO_MMIO;
+ enum emulation_result er;
u32 rt;
- u32 bytes;
void *data = run->mmio.data;
unsigned long curr_pc;
@@ -1491,103 +1617,74 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
rt = inst.i_format.rt;
+ run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+ vcpu->arch.host_cp0_badvaddr);
+ if (run->mmio.phys_addr == KVM_INVALID_ADDR)
+ goto out_fail;
+
switch (inst.i_format.opcode) {
- case sb_op:
- bytes = 1;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u8 *) data = vcpu->arch.gprs[rt];
- kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
- vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt],
- *(u8 *) data);
+#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
+ case sd_op:
+ run->mmio.len = 8;
+ *(u64 *)data = vcpu->arch.gprs[rt];
+ kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n",
+ vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+ vcpu->arch.gprs[rt], *(u64 *)data);
break;
+#endif
case sw_op:
- bytes = 4;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u32 *) data = vcpu->arch.gprs[rt];
+ run->mmio.len = 4;
+ *(u32 *)data = vcpu->arch.gprs[rt];
kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n",
vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
- vcpu->arch.gprs[rt], *(u32 *) data);
+ vcpu->arch.gprs[rt], *(u32 *)data);
break;
case sh_op:
- bytes = 2;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u16 *) data = vcpu->arch.gprs[rt];
+ run->mmio.len = 2;
+ *(u16 *)data = vcpu->arch.gprs[rt];
kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n",
vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
- vcpu->arch.gprs[rt], *(u32 *) data);
+ vcpu->arch.gprs[rt], *(u16 *)data);
+ break;
+
+ case sb_op:
+ run->mmio.len = 1;
+ *(u8 *)data = vcpu->arch.gprs[rt];
+
+ kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+ vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+ vcpu->arch.gprs[rt], *(u8 *)data);
break;
default:
kvm_err("Store not yet supported (inst=0x%08x)\n",
inst.word);
- er = EMULATE_FAIL;
- break;
+ goto out_fail;
}
- /* Rollback PC if emulation was unsuccessful */
- if (er == EMULATE_FAIL)
- vcpu->arch.pc = curr_pc;
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+ return EMULATE_DO_MMIO;
- return er;
+out_fail:
+ /* Rollback PC if emulation was unsuccessful */
+ vcpu->arch.pc = curr_pc;
+ return EMULATE_FAIL;
}
enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
u32 cause, struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
- enum emulation_result er = EMULATE_DO_MMIO;
+ enum emulation_result er;
unsigned long curr_pc;
u32 op, rt;
- u32 bytes;
rt = inst.i_format.rt;
op = inst.i_format.opcode;
@@ -1606,96 +1703,53 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
vcpu->arch.io_gpr = rt;
+ run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+ vcpu->arch.host_cp0_badvaddr);
+ if (run->mmio.phys_addr == KVM_INVALID_ADDR)
+ return EMULATE_FAIL;
+
+ vcpu->mmio_needed = 2; /* signed */
switch (op) {
- case lw_op:
- bytes = 4;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
+#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
+ case ld_op:
+ run->mmio.len = 8;
+ break;
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 0;
+ case lwu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
+#endif
+ case lw_op:
+ run->mmio.len = 4;
break;
- case lh_op:
case lhu_op:
- bytes = 2;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 0;
-
- if (op == lh_op)
- vcpu->mmio_needed = 2;
- else
- vcpu->mmio_needed = 1;
-
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
+ case lh_op:
+ run->mmio.len = 2;
break;
case lbu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
case lb_op:
- bytes = 1;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_is_write = 0;
-
- if (op == lb_op)
- vcpu->mmio_needed = 2;
- else
- vcpu->mmio_needed = 1;
-
+ run->mmio.len = 1;
break;
default:
kvm_err("Load not yet supported (inst=0x%08x)\n",
inst.word);
- er = EMULATE_FAIL;
- break;
+ vcpu->mmio_needed = 0;
+ return EMULATE_FAIL;
}
- return er;
+ run->mmio.is_write = 0;
+ vcpu->mmio_is_write = 0;
+ return EMULATE_DO_MMIO;
}
+#ifndef CONFIG_KVM_MIPS_VZ
static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
unsigned long curr_pc,
unsigned long addr,
@@ -1786,11 +1840,35 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
arch->gprs[base], offset);
- if (cache == Cache_D)
+ if (cache == Cache_D) {
+#ifdef CONFIG_CPU_R4K_CACHE_TLB
r4k_blast_dcache();
- else if (cache == Cache_I)
+#else
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* locally flush icache */
+ local_flush_icache_range(0, 0);
+ break;
+ default:
+ __flush_cache_all();
+ break;
+ }
+#endif
+ } else if (cache == Cache_I) {
+#ifdef CONFIG_CPU_R4K_CACHE_TLB
r4k_blast_icache();
- else {
+#else
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* locally flush icache */
+ local_flush_icache_range(0, 0);
+ break;
+ default:
+ flush_icache_all();
+ break;
+ }
+#endif
+ } else {
kvm_err("%s: unsupported CACHE INDEX operation\n",
__func__);
return EMULATE_FAIL;
@@ -1870,18 +1948,6 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
case cop0_op:
er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu);
break;
- case sb_op:
- case sh_op:
- case sw_op:
- er = kvm_mips_emulate_store(inst, cause, run, vcpu);
- break;
- case lb_op:
- case lbu_op:
- case lhu_op:
- case lh_op:
- case lw_op:
- er = kvm_mips_emulate_load(inst, cause, run, vcpu);
- break;
#ifndef CONFIG_CPU_MIPSR6
case cache_op:
@@ -1915,6 +1981,7 @@ unknown:
return er;
}
+#endif /* CONFIG_KVM_MIPS_VZ */
/**
* kvm_mips_guest_exception_base() - Find guest exception vector base address.
@@ -2524,8 +2591,15 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
vcpu->arch.pc = vcpu->arch.io_pc;
switch (run->mmio.len) {
+ case 8:
+ *gpr = *(s64 *)run->mmio.data;
+ break;
+
case 4:
- *gpr = *(s32 *) run->mmio.data;
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s32 *)run->mmio.data;
+ else
+ *gpr = *(u32 *)run->mmio.data;
break;
case 2:
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
index c5b254c4d0da..16e1c93b484f 100644
--- a/arch/mips/kvm/entry.c
+++ b/arch/mips/kvm/entry.c
@@ -51,12 +51,15 @@
#define RA 31
/* Some CP0 registers */
+#define C0_PWBASE 5, 5
#define C0_HWRENA 7, 0
#define C0_BADVADDR 8, 0
#define C0_BADINSTR 8, 1
#define C0_BADINSTRP 8, 2
#define C0_ENTRYHI 10, 0
+#define C0_GUESTCTL1 10, 4
#define C0_STATUS 12, 0
+#define C0_GUESTCTL0 12, 6
#define C0_CAUSE 13, 0
#define C0_EPC 14, 0
#define C0_EBASE 15, 1
@@ -292,8 +295,8 @@ static void *kvm_mips_build_enter_guest(void *addr)
unsigned int i;
struct uasm_label labels[2];
struct uasm_reloc relocs[2];
- struct uasm_label *l = labels;
- struct uasm_reloc *r = relocs;
+ struct uasm_label __maybe_unused *l = labels;
+ struct uasm_reloc __maybe_unused *r = relocs;
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
@@ -302,7 +305,67 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
UASM_i_MTC0(&p, T0, C0_EPC);
- /* Set the ASID for the Guest Kernel */
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */
+ UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
+ UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1);
+
+ /*
+ * Set up KVM GPA pgd.
+ * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
+ * - call tlbmiss_handler_setup_pgd(mm->pgd)
+ * - write mm->pgd into CP0_PWBase
+ *
+ * We keep S0 pointing at struct kvm so we can load the ASID below.
+ */
+ UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) -
+ (int)offsetof(struct kvm_vcpu, arch), K1);
+ UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0);
+ UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
+ uasm_i_jalr(&p, RA, T9);
+ /* delay slot */
+ if (cpu_has_htw)
+ UASM_i_MTC0(&p, A0, C0_PWBASE);
+ else
+ uasm_i_nop(&p);
+
+ /* Set GM bit to setup eret to VZ guest context */
+ uasm_i_addiu(&p, V1, ZERO, 1);
+ uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
+ uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1);
+ uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+
+ if (cpu_has_guestid) {
+ /*
+ * Set root mode GuestID, so that root TLB refill handler can
+ * use the correct GuestID in the root TLB.
+ */
+
+ /* Get current GuestID */
+ uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+ /* Set GuestCtl1.RID = GuestCtl1.ID */
+ uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT,
+ MIPS_GCTL1_ID_WIDTH);
+ uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT,
+ MIPS_GCTL1_RID_WIDTH);
+ uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+
+ /* GuestID handles dealiasing so we don't need to touch ASID */
+ goto skip_asid_restore;
+ }
+
+ /* Root ASID Dealias (RAD) */
+
+ /* Save host ASID */
+ UASM_i_MFC0(&p, K0, C0_ENTRYHI);
+ UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+ K1);
+
+ /* Set the root ASID for the Guest */
+ UASM_i_ADDIU(&p, T1, S0,
+ offsetof(struct kvm, arch.gpa_mm.context.asid));
+#else
+ /* Set the ASID for the Guest Kernel or User */
UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1);
UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]),
T0);
@@ -315,6 +378,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
guest_user_mm.context.asid));
uasm_l_kernel_asid(&l, p);
+#endif
/* t1: contains the base of the ASID array, need to get the cpu id */
/* smp_processor_id */
@@ -339,6 +403,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
#endif
+#ifndef CONFIG_KVM_MIPS_VZ
/*
* Set up KVM T&E GVA pgd.
* This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
@@ -351,7 +416,11 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
uasm_i_jalr(&p, RA, T9);
uasm_i_mtc0(&p, K0, C0_ENTRYHI);
-
+#else
+ /* Set up KVM VZ root ASID (!guestid) */
+ uasm_i_mtc0(&p, K0, C0_ENTRYHI);
+skip_asid_restore:
+#endif
uasm_i_ehb(&p);
/* Disable RDHWR access */
@@ -559,13 +628,10 @@ void *kvm_mips_build_exit(void *addr)
/* Now that context has been saved, we can use other registers */
/* Restore vcpu */
- UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
- uasm_i_move(&p, S1, A1);
+ UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
/* Restore run (vcpu->run) */
- UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1);
- /* Save pointer to run in s0, will be saved by the compiler */
- uasm_i_move(&p, S0, A0);
+ UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1);
/*
* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
@@ -641,6 +707,52 @@ void *kvm_mips_build_exit(void *addr)
uasm_l_msa_1(&l, p);
}
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* Restore host ASID */
+ if (!cpu_has_guestid) {
+ UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+ K1);
+ UASM_i_MTC0(&p, K0, C0_ENTRYHI);
+ }
+
+ /*
+ * Set up normal Linux process pgd.
+ * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
+ * - call tlbmiss_handler_setup_pgd(mm->pgd)
+ * - write mm->pgd into CP0_PWBase
+ */
+ UASM_i_LW(&p, A0,
+ offsetof(struct kvm_vcpu_arch, host_pgd), K1);
+ UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
+ uasm_i_jalr(&p, RA, T9);
+ /* delay slot */
+ if (cpu_has_htw)
+ UASM_i_MTC0(&p, A0, C0_PWBASE);
+ else
+ uasm_i_nop(&p);
+
+ /* Clear GM bit so we don't enter guest mode when EXL is cleared */
+ uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
+ uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1);
+ uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+
+ /* Save GuestCtl0 so we can access GExcCode after CPU migration */
+ uasm_i_sw(&p, K0,
+ offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1);
+
+ if (cpu_has_guestid) {
+ /*
+ * Clear root mode GuestID, so that root TLB operations use the
+ * root GuestID in the root TLB.
+ */
+ uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+ /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */
+ uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT,
+ MIPS_GCTL1_RID_WIDTH);
+ uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+ }
+#endif
+
/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
uasm_i_and(&p, V0, V0, AT);
@@ -680,6 +792,8 @@ void *kvm_mips_build_exit(void *addr)
* Now jump to the kvm_mips_handle_exit() to see if we can deal
* with this in the kernel
*/
+ uasm_i_move(&p, A0, S0);
+ uasm_i_move(&p, A1, S1);
UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
uasm_i_jalr(&p, RA, T9);
UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c
new file mode 100644
index 000000000000..83063435195f
--- /dev/null
+++ b/arch/mips/kvm/hypcall.c
@@ -0,0 +1,53 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * KVM/MIPS: Hypercall handling.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm_para.h>
+
+#define MAX_HYPCALL_ARGS 4
+
+enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
+ union mips_instruction inst)
+{
+ unsigned int code = (inst.co_format.code >> 5) & 0x3ff;
+
+ kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code);
+
+ switch (code) {
+ case 0:
+ return EMULATE_HYPERCALL;
+ default:
+ return EMULATE_FAIL;
+ };
+}
+
+static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num,
+ const unsigned long *args, unsigned long *hret)
+{
+ /* Report unimplemented hypercall to guest */
+ *hret = -KVM_ENOSYS;
+ return RESUME_GUEST;
+}
+
+int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long num, args[MAX_HYPCALL_ARGS];
+
+ /* read hypcall number and arguments */
+ num = vcpu->arch.gprs[2]; /* v0 */
+ args[0] = vcpu->arch.gprs[4]; /* a0 */
+ args[1] = vcpu->arch.gprs[5]; /* a1 */
+ args[2] = vcpu->arch.gprs[6]; /* a2 */
+ args[3] = vcpu->arch.gprs[7]; /* a3 */
+
+ return kvm_mips_hypercall(vcpu, num,
+ args, &vcpu->arch.gprs[2] /* v0 */);
+}
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index fb118a2c8379..3bf0a49725e8 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -30,8 +30,13 @@
#define C_TI (_ULCAST_(1) << 30)
+#ifdef CONFIG_KVM_MIPS_VZ
+#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1)
+#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1)
+#else
#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0)
#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0)
+#endif
void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 15a1b1716c2e..d4b2ad18eef2 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -59,6 +59,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU },
{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
+#ifdef CONFIG_KVM_MIPS_VZ
+ { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU },
+ { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU },
+ { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU },
+ { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU },
+ { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU },
+ { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU },
+ { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU },
+ { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU },
+#endif
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
@@ -66,6 +76,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{NULL}
};
+bool kvm_trace_guest_mode_change;
+
+int kvm_guest_mode_change_trace_reg(void)
+{
+ kvm_trace_guest_mode_change = 1;
+ return 0;
+}
+
+void kvm_guest_mode_change_trace_unreg(void)
+{
+ kvm_trace_guest_mode_change = 0;
+}
+
/*
* XXXKYMA: We are simulatoring a processor that has the WII bit set in
* Config7, so we are "runnable" if interrupts are pending
@@ -82,7 +105,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
int kvm_arch_hardware_enable(void)
{
- return 0;
+ return kvm_mips_callbacks->hardware_enable();
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ kvm_mips_callbacks->hardware_disable();
}
int kvm_arch_hardware_setup(void)
@@ -97,6 +125,18 @@ void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ switch (type) {
+#ifdef CONFIG_KVM_MIPS_VZ
+ case KVM_VM_MIPS_VZ:
+#else
+ case KVM_VM_MIPS_TE:
+#endif
+ break;
+ default:
+ /* Unsupported KVM type */
+ return -EINVAL;
+ };
+
/* Allocate page table to map GPA -> RPA */
kvm->arch.gpa_mm.pgd = kvm_pgd_alloc();
if (!kvm->arch.gpa_mm.pgd)
@@ -301,8 +341,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* Build guest exception vectors dynamically in unmapped memory */
handler = gebase + 0x2000;
- /* TLB refill */
+ /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */
refill_start = gebase;
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT))
+ refill_start += 0x080;
refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler);
/* General Exception Entry point */
@@ -353,9 +395,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* Init */
vcpu->arch.last_sched_cpu = -1;
-
- /* Start off the timer */
- kvm_mips_init_count(vcpu);
+ vcpu->arch.last_exec_cpu = -1;
return vcpu;
@@ -1030,9 +1070,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_NR_VCPUS:
r = num_online_cpus();
break;
@@ -1059,7 +1096,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
break;
default:
- r = 0;
+ r = kvm_mips_callbacks->check_extension(kvm, ext);
break;
}
return r;
@@ -1067,7 +1104,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_mips_pending_timer(vcpu);
+ return kvm_mips_pending_timer(vcpu) ||
+ kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI;
}
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
@@ -1092,7 +1130,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo);
cop0 = vcpu->arch.cop0;
- kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n",
+ kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n",
kvm_read_c0_guest_status(cop0),
kvm_read_c0_guest_cause(cop0));
@@ -1208,7 +1246,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
/* re-enable HTW before enabling interrupts */
- htw_start();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ htw_start();
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -1226,17 +1265,20 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
cause, opc, run, vcpu);
trace_kvm_exit(vcpu, exccode);
- /*
- * Do a privilege check, if in UM most of these exit conditions end up
- * causing an exception to be delivered to the Guest Kernel
- */
- er = kvm_mips_check_privilege(cause, opc, run, vcpu);
- if (er == EMULATE_PRIV_FAIL) {
- goto skip_emul;
- } else if (er == EMULATE_FAIL) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- ret = RESUME_HOST;
- goto skip_emul;
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ /*
+ * Do a privilege check, if in UM most of these exit conditions
+ * end up causing an exception to be delivered to the Guest
+ * Kernel
+ */
+ er = kvm_mips_check_privilege(cause, opc, run, vcpu);
+ if (er == EMULATE_PRIV_FAIL) {
+ goto skip_emul;
+ } else if (er == EMULATE_FAIL) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ goto skip_emul;
+ }
}
switch (exccode) {
@@ -1267,7 +1309,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case EXCCODE_TLBS:
- kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
+ kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n",
cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
badvaddr);
@@ -1328,12 +1370,17 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
break;
+ case EXCCODE_GE:
+ /* defer exit accounting to handler */
+ ret = kvm_mips_callbacks->handle_guest_exit(vcpu);
+ break;
+
default:
if (cause & CAUSEF_BD)
opc += 1;
inst = 0;
kvm_get_badinstr(opc, vcpu, &inst);
- kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
+ kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
exccode, opc, inst, badvaddr,
kvm_read_c0_guest_status(vcpu->arch.cop0));
kvm_arch_vcpu_dump_regs(vcpu);
@@ -1346,6 +1393,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
skip_emul:
local_irq_disable();
+ if (ret == RESUME_GUEST)
+ kvm_vz_acquire_htimer(vcpu);
+
if (er == EMULATE_DONE && !(ret & RESUME_HOST))
kvm_mips_deliver_interrupts(vcpu, cause);
@@ -1391,7 +1441,8 @@ skip_emul:
}
/* Disable HTW before returning to guest or host */
- htw_stop();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ htw_stop();
return ret;
}
@@ -1527,16 +1578,18 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu)
void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
/*
- * FPU & MSA get disabled in root context (hardware) when it is disabled
- * in guest context (software), but the register state in the hardware
- * may still be in use. This is why we explicitly re-enable the hardware
- * before saving.
+ * With T&E, FPU & MSA get disabled in root context (hardware) when it
+ * is disabled in guest context (software), but the register state in
+ * the hardware may still be in use.
+ * This is why we explicitly re-enable the hardware before saving.
*/
preempt_disable();
if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
- set_c0_config5(MIPS_CONF5_MSAEN);
- enable_fpu_hazard();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ set_c0_config5(MIPS_CONF5_MSAEN);
+ enable_fpu_hazard();
+ }
__kvm_save_msa(&vcpu->arch);
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA);
@@ -1549,8 +1602,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
}
vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA);
} else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
- set_c0_status(ST0_CU1);
- enable_fpu_hazard();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ set_c0_status(ST0_CU1);
+ enable_fpu_hazard();
+ }
__kvm_save_fpu(&vcpu->arch);
vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index cb0faade311e..ee64db032793 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -992,6 +992,22 @@ static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
return kvm_mips_gpa_pte_to_gva_unmapped(pte);
}
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
+ struct kvm_vcpu *vcpu,
+ bool write_fault)
+{
+ int ret;
+
+ ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
+ if (ret)
+ return ret;
+
+ /* Invalidate this entry in the TLB */
+ return kvm_vz_host_tlb_inv(vcpu, badvaddr);
+}
+#endif
+
/* XXXKYMA: Must be called with interrupts disabled */
int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu,
@@ -1225,6 +1241,10 @@ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
{
int err;
+ if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
+ "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
+ return -EINVAL;
+
retry:
kvm_trap_emul_gva_lockless_begin(vcpu);
err = get_user(*out, opc);
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 2819eb793345..7c6336dd2638 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -33,6 +33,25 @@
#define KVM_GUEST_PC_TLB 0
#define KVM_GUEST_SP_TLB 1
+#ifdef CONFIG_KVM_MIPS_VZ
+unsigned long GUESTID_MASK;
+EXPORT_SYMBOL_GPL(GUESTID_MASK);
+unsigned long GUESTID_FIRST_VERSION;
+EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION);
+unsigned long GUESTID_VERSION_MASK;
+EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK);
+
+static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu)
+{
+ struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm;
+
+ if (cpu_has_guestid)
+ return 0;
+ else
+ return cpu_asid(smp_processor_id(), gpa_mm);
+}
+#endif
+
static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -166,6 +185,13 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
local_irq_restore(flags);
+ /*
+ * We don't want to get reserved instruction exceptions for missing tlb
+ * entries.
+ */
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
if (user && idx_user >= 0)
kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n",
__func__, (va & VPN2_MASK) |
@@ -179,6 +205,421 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
+#ifdef CONFIG_KVM_MIPS_VZ
+
+/* GuestID management */
+
+/**
+ * clear_root_gid() - Set GuestCtl1.RID for normal root operation.
+ */
+static inline void clear_root_gid(void)
+{
+ if (cpu_has_guestid) {
+ clear_c0_guestctl1(MIPS_GCTL1_RID);
+ mtc0_tlbw_hazard();
+ }
+}
+
+/**
+ * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID.
+ *
+ * Sets the root GuestID to match the current guest GuestID, for TLB operation
+ * on the GPA->RPA mappings in the root TLB.
+ *
+ * The caller must be sure to disable HTW while the root GID is set, and
+ * possibly longer if TLB registers are modified.
+ */
+static inline void set_root_gid_to_guest_gid(void)
+{
+ unsigned int guestctl1;
+
+ if (cpu_has_guestid) {
+ back_to_back_c0_hazard();
+ guestctl1 = read_c0_guestctl1();
+ guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) |
+ ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT)
+ << MIPS_GCTL1_RID_SHIFT;
+ write_c0_guestctl1(guestctl1);
+ mtc0_tlbw_hazard();
+ }
+}
+
+int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
+{
+ int idx;
+ unsigned long flags, old_entryhi;
+
+ local_irq_save(flags);
+ htw_stop();
+
+ /* Set root GuestID for root probe and write of guest TLB entry */
+ set_root_gid_to_guest_gid();
+
+ old_entryhi = read_c0_entryhi();
+
+ idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
+ kvm_mips_get_root_asid(vcpu));
+
+ write_c0_entryhi(old_entryhi);
+ clear_root_gid();
+ mtc0_tlbw_hazard();
+
+ htw_start();
+ local_irq_restore(flags);
+
+ /*
+ * We don't want to get reserved instruction exceptions for missing tlb
+ * entries.
+ */
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
+ if (idx > 0)
+ kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n",
+ __func__, (va & VPN2_MASK) |
+ kvm_mips_get_root_asid(vcpu), idx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv);
+
+/**
+ * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping.
+ * @vcpu: KVM VCPU pointer.
+ * @gpa: Guest virtual address in a TLB mapped guest segment.
+ * @gpa: Ponter to output guest physical address it maps to.
+ *
+ * Converts a guest virtual address in a guest TLB mapped segment to a guest
+ * physical address, by probing the guest TLB.
+ *
+ * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been
+ * written.
+ * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not
+ * have been written.
+ */
+int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa)
+{
+ unsigned long o_entryhi, o_entrylo[2], o_pagemask;
+ unsigned int o_index;
+ unsigned long entrylo[2], pagemask, pagemaskbit, pa;
+ unsigned long flags;
+ int index;
+
+ /* Probe the guest TLB for a mapping */
+ local_irq_save(flags);
+ /* Set root GuestID for root probe of guest TLB entry */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+
+ o_entryhi = read_gc0_entryhi();
+ o_index = read_gc0_index();
+
+ write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl));
+ mtc0_tlbw_hazard();
+ guest_tlb_probe();
+ tlb_probe_hazard();
+
+ index = read_gc0_index();
+ if (index < 0) {
+ /* No match, fail */
+ write_gc0_entryhi(o_entryhi);
+ write_gc0_index(o_index);
+
+ clear_root_gid();
+ htw_start();
+ local_irq_restore(flags);
+ return -EFAULT;
+ }
+
+ /* Match! read the TLB entry */
+ o_entrylo[0] = read_gc0_entrylo0();
+ o_entrylo[1] = read_gc0_entrylo1();
+ o_pagemask = read_gc0_pagemask();
+
+ mtc0_tlbr_hazard();
+ guest_tlb_read();
+ tlb_read_hazard();
+
+ entrylo[0] = read_gc0_entrylo0();
+ entrylo[1] = read_gc0_entrylo1();
+ pagemask = ~read_gc0_pagemask() & ~0x1fffl;
+
+ write_gc0_entryhi(o_entryhi);
+ write_gc0_index(o_index);
+ write_gc0_entrylo0(o_entrylo[0]);
+ write_gc0_entrylo1(o_entrylo[1]);
+ write_gc0_pagemask(o_pagemask);
+
+ clear_root_gid();
+ htw_start();
+ local_irq_restore(flags);
+
+ /* Select one of the EntryLo values and interpret the GPA */
+ pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1;
+ pa = entrylo[!!(gva & pagemaskbit)];
+
+ /*
+ * TLB entry may have become invalid since TLB probe if physical FTLB
+ * entries are shared between threads (e.g. I6400).
+ */
+ if (!(pa & ENTRYLO_V))
+ return -EFAULT;
+
+ /*
+ * Note, this doesn't take guest MIPS32 XPA into account, where PFN is
+ * split with XI/RI in the middle.
+ */
+ pa = (pa << 6) & ~0xfffl;
+ pa |= gva & ~(pagemask | pagemaskbit);
+
+ *gpa = pa;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup);
+
+/**
+ * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for
+ * guests.
+ *
+ * Invalidate all entries in root tlb which are GPA mappings.
+ */
+void kvm_vz_local_flush_roottlb_all_guests(void)
+{
+ unsigned long flags;
+ unsigned long old_entryhi, old_pagemask, old_guestctl1;
+ int entry;
+
+ if (WARN_ON(!cpu_has_guestid))
+ return;
+
+ local_irq_save(flags);
+ htw_stop();
+
+ /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */
+ old_entryhi = read_c0_entryhi();
+ old_pagemask = read_c0_pagemask();
+ old_guestctl1 = read_c0_guestctl1();
+
+ /*
+ * Invalidate guest entries in root TLB while leaving root entries
+ * intact when possible.
+ */
+ for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
+ write_c0_index(entry);
+ mtc0_tlbw_hazard();
+ tlb_read();
+ tlb_read_hazard();
+
+ /* Don't invalidate non-guest (RVA) mappings in the root TLB */
+ if (!(read_c0_guestctl1() & MIPS_GCTL1_RID))
+ continue;
+
+ /* Make sure all entries differ. */
+ write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+ write_c0_entrylo0(0);
+ write_c0_entrylo1(0);
+ write_c0_guestctl1(0);
+ mtc0_tlbw_hazard();
+ tlb_write_indexed();
+ }
+
+ write_c0_entryhi(old_entryhi);
+ write_c0_pagemask(old_pagemask);
+ write_c0_guestctl1(old_guestctl1);
+ tlbw_use_hazard();
+
+ htw_start();
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests);
+
+/**
+ * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries.
+ *
+ * Invalidate all entries in guest tlb irrespective of guestid.
+ */
+void kvm_vz_local_flush_guesttlb_all(void)
+{
+ unsigned long flags;
+ unsigned long old_index;
+ unsigned long old_entryhi;
+ unsigned long old_entrylo[2];
+ unsigned long old_pagemask;
+ int entry;
+ u64 cvmmemctl2 = 0;
+
+ local_irq_save(flags);
+
+ /* Preserve all clobbered guest registers */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo[0] = read_gc0_entrylo0();
+ old_entrylo[1] = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Inhibit machine check due to multiple matching TLB entries */
+ cvmmemctl2 = read_c0_cvmmemctl2();
+ cvmmemctl2 |= CVMMEMCTL2_INHIBITTS;
+ write_c0_cvmmemctl2(cvmmemctl2);
+ break;
+ };
+
+ /* Invalidate guest entries in guest TLB */
+ write_gc0_entrylo0(0);
+ write_gc0_entrylo1(0);
+ write_gc0_pagemask(0);
+ for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) {
+ /* Make sure all entries differ. */
+ write_gc0_index(entry);
+ write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry));
+ mtc0_tlbw_hazard();
+ guest_tlb_write_indexed();
+ }
+
+ if (cvmmemctl2) {
+ cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS;
+ write_c0_cvmmemctl2(cvmmemctl2);
+ };
+
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo[0]);
+ write_gc0_entrylo1(old_entrylo[1]);
+ write_gc0_pagemask(old_pagemask);
+ tlbw_use_hazard();
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all);
+
+/**
+ * kvm_vz_save_guesttlb() - Save a range of guest TLB entries.
+ * @buf: Buffer to write TLB entries into.
+ * @index: Start index.
+ * @count: Number of entries to save.
+ *
+ * Save a range of guest TLB entries. The caller must ensure interrupts are
+ * disabled.
+ */
+void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count)
+{
+ unsigned int end = index + count;
+ unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
+ unsigned int guestctl1 = 0;
+ int old_index, i;
+
+ /* Save registers we're about to clobber */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo0 = read_gc0_entrylo0();
+ old_entrylo1 = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ /* Set root GuestID for root probe */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+ if (cpu_has_guestid)
+ guestctl1 = read_c0_guestctl1();
+
+ /* Read each entry from guest TLB */
+ for (i = index; i < end; ++i, ++buf) {
+ write_gc0_index(i);
+
+ mtc0_tlbr_hazard();
+ guest_tlb_read();
+ tlb_read_hazard();
+
+ if (cpu_has_guestid &&
+ (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) {
+ /* Entry invalid or belongs to another guest */
+ buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
+ buf->tlb_lo[0] = 0;
+ buf->tlb_lo[1] = 0;
+ buf->tlb_mask = 0;
+ } else {
+ /* Entry belongs to the right guest */
+ buf->tlb_hi = read_gc0_entryhi();
+ buf->tlb_lo[0] = read_gc0_entrylo0();
+ buf->tlb_lo[1] = read_gc0_entrylo1();
+ buf->tlb_mask = read_gc0_pagemask();
+ }
+ }
+
+ /* Clear root GuestID again */
+ clear_root_gid();
+ htw_start();
+
+ /* Restore clobbered registers */
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo0);
+ write_gc0_entrylo1(old_entrylo1);
+ write_gc0_pagemask(old_pagemask);
+
+ tlbw_use_hazard();
+}
+EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb);
+
+/**
+ * kvm_vz_load_guesttlb() - Save a range of guest TLB entries.
+ * @buf: Buffer to read TLB entries from.
+ * @index: Start index.
+ * @count: Number of entries to load.
+ *
+ * Load a range of guest TLB entries. The caller must ensure interrupts are
+ * disabled.
+ */
+void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count)
+{
+ unsigned int end = index + count;
+ unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
+ int old_index, i;
+
+ /* Save registers we're about to clobber */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo0 = read_gc0_entrylo0();
+ old_entrylo1 = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ /* Set root GuestID for root probe */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+
+ /* Write each entry to guest TLB */
+ for (i = index; i < end; ++i, ++buf) {
+ write_gc0_index(i);
+ write_gc0_entryhi(buf->tlb_hi);
+ write_gc0_entrylo0(buf->tlb_lo[0]);
+ write_gc0_entrylo1(buf->tlb_lo[1]);
+ write_gc0_pagemask(buf->tlb_mask);
+
+ mtc0_tlbw_hazard();
+ guest_tlb_write_indexed();
+ }
+
+ /* Clear root GuestID again */
+ clear_root_gid();
+ htw_start();
+
+ /* Restore clobbered registers */
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo0);
+ write_gc0_entrylo1(old_entrylo1);
+ write_gc0_pagemask(old_pagemask);
+
+ tlbw_use_hazard();
+}
+EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb);
+
+#endif
+
/**
* kvm_mips_suspend_mm() - Suspend the active mm.
* @cpu The CPU we're running on.
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
index c858cf168078..a8c7fd7bf6d2 100644
--- a/arch/mips/kvm/trace.h
+++ b/arch/mips/kvm/trace.h
@@ -18,6 +18,13 @@
#define TRACE_INCLUDE_FILE trace
/*
+ * arch/mips/kvm/mips.c
+ */
+extern bool kvm_trace_guest_mode_change;
+int kvm_guest_mode_change_trace_reg(void);
+void kvm_guest_mode_change_trace_unreg(void);
+
+/*
* Tracepoints for VM enters
*/
DECLARE_EVENT_CLASS(kvm_transition,
@@ -62,10 +69,20 @@ DEFINE_EVENT(kvm_transition, kvm_out,
#define KVM_TRACE_EXIT_MSA_FPE 14
#define KVM_TRACE_EXIT_FPE 15
#define KVM_TRACE_EXIT_MSA_DISABLED 21
+#define KVM_TRACE_EXIT_GUEST_EXIT 27
/* Further exit reasons */
#define KVM_TRACE_EXIT_WAIT 32
#define KVM_TRACE_EXIT_CACHE 33
#define KVM_TRACE_EXIT_SIGNAL 34
+/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */
+#define KVM_TRACE_EXIT_GEXCCODE_BASE 64
+#define KVM_TRACE_EXIT_GPSI 64 /* 0 */
+#define KVM_TRACE_EXIT_GSFC 65 /* 1 */
+#define KVM_TRACE_EXIT_HC 66 /* 2 */
+#define KVM_TRACE_EXIT_GRR 67 /* 3 */
+#define KVM_TRACE_EXIT_GVA 72 /* 8 */
+#define KVM_TRACE_EXIT_GHFC 73 /* 9 */
+#define KVM_TRACE_EXIT_GPA 74 /* 10 */
/* Tracepoints for VM exits */
#define kvm_trace_symbol_exit_types \
@@ -83,9 +100,17 @@ DEFINE_EVENT(kvm_transition, kvm_out,
{ KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \
{ KVM_TRACE_EXIT_FPE, "FPE" }, \
{ KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \
+ { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \
{ KVM_TRACE_EXIT_WAIT, "WAIT" }, \
{ KVM_TRACE_EXIT_CACHE, "CACHE" }, \
- { KVM_TRACE_EXIT_SIGNAL, "Signal" }
+ { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \
+ { KVM_TRACE_EXIT_GPSI, "GPSI" }, \
+ { KVM_TRACE_EXIT_GSFC, "GSFC" }, \
+ { KVM_TRACE_EXIT_HC, "HC" }, \
+ { KVM_TRACE_EXIT_GRR, "GRR" }, \
+ { KVM_TRACE_EXIT_GVA, "GVA" }, \
+ { KVM_TRACE_EXIT_GHFC, "GHFC" }, \
+ { KVM_TRACE_EXIT_GPA, "GPA" }
TRACE_EVENT(kvm_exit,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
@@ -158,6 +183,8 @@ TRACE_EVENT(kvm_exit,
{ KVM_TRACE_COP0(16, 4), "Config4" }, \
{ KVM_TRACE_COP0(16, 5), "Config5" }, \
{ KVM_TRACE_COP0(16, 7), "Config7" }, \
+ { KVM_TRACE_COP0(17, 1), "MAAR" }, \
+ { KVM_TRACE_COP0(17, 2), "MAARI" }, \
{ KVM_TRACE_COP0(26, 0), "ECC" }, \
{ KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \
{ KVM_TRACE_COP0(31, 2), "KScratch1" }, \
@@ -268,6 +295,51 @@ TRACE_EVENT(kvm_asid_change,
__entry->new_asid)
);
+TRACE_EVENT(kvm_guestid_change,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid),
+ TP_ARGS(vcpu, guestid),
+ TP_STRUCT__entry(
+ __field(unsigned int, guestid)
+ ),
+
+ TP_fast_assign(
+ __entry->guestid = guestid;
+ ),
+
+ TP_printk("GuestID: 0x%02x",
+ __entry->guestid)
+);
+
+TRACE_EVENT_FN(kvm_guest_mode_change,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+ TP_STRUCT__entry(
+ __field(unsigned long, epc)
+ __field(unsigned long, pc)
+ __field(unsigned long, badvaddr)
+ __field(unsigned int, status)
+ __field(unsigned int, cause)
+ ),
+
+ TP_fast_assign(
+ __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0);
+ __entry->pc = vcpu->arch.pc;
+ __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0);
+ __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0);
+ __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0);
+ ),
+
+ TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx",
+ __entry->epc,
+ __entry->pc,
+ __entry->status,
+ __entry->cause,
+ __entry->badvaddr),
+
+ kvm_guest_mode_change_trace_reg,
+ kvm_guest_mode_change_trace_unreg
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index b1fa53b252ea..a563759fd142 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
+#include <linux/log2.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/mmu_context.h>
@@ -40,6 +41,29 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
return gpa;
}
+static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu)
+{
+ u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
+ unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
+ u32 inst = 0;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
+ exccode, opc, inst, badvaddr,
+ kvm_read_c0_guest_status(vcpu->arch.cop0));
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+}
+
static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -82,6 +106,10 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
ret = RESUME_HOST;
break;
+ case EMULATE_HYPERCALL:
+ ret = kvm_mips_handle_hypcall(vcpu);
+ break;
+
default:
BUG();
}
@@ -484,6 +512,31 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
return ret;
}
+static int kvm_trap_emul_hardware_enable(void)
+{
+ return 0;
+}
+
+static void kvm_trap_emul_hardware_disable(void)
+{
+}
+
+static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_MIPS_TE:
+ r = 1;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -561,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
u32 config, config1;
int vcpu_id = vcpu->vcpu_id;
+ /* Start off the timer at 100 MHz */
+ kvm_mips_init_count(vcpu, 100*1000*1000);
+
/*
* Arch specific stuff, set up config registers properly so that the
* guest will come up as expected
@@ -589,6 +645,13 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
/* Read the cache characteristics from the host Config1 Register */
config1 = (read_c0_config1() & ~0x7f);
+ /* DCache line size not correctly reported in Config1 on Octeon CPUs */
+ if (cpu_dcache_line_size()) {
+ config1 &= ~MIPS_CONF1_DL;
+ config1 |= ((ilog2(cpu_dcache_line_size()) - 1) <<
+ MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL;
+ }
+
/* Set up MMU size */
config1 &= ~(0x3f << 25);
config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25);
@@ -892,10 +955,12 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
if (v & CAUSEF_DC) {
/* disable timer first */
kvm_mips_count_disable_cause(vcpu);
- kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
+ v);
} else {
/* enable timer last */
- kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
+ v);
kvm_mips_count_enable_cause(vcpu);
}
} else {
@@ -1230,7 +1295,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
.handle_fpe = kvm_trap_emul_handle_fpe,
.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
+ .handle_guest_exit = kvm_trap_emul_no_handler,
+ .hardware_enable = kvm_trap_emul_hardware_enable,
+ .hardware_disable = kvm_trap_emul_hardware_disable,
+ .check_extension = kvm_trap_emul_check_extension,
.vcpu_init = kvm_trap_emul_vcpu_init,
.vcpu_uninit = kvm_trap_emul_vcpu_uninit,
.vcpu_setup = kvm_trap_emul_vcpu_setup,
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
new file mode 100644
index 000000000000..71d8856ade64
--- /dev/null
+++ b/arch/mips/kvm/vz.c
@@ -0,0 +1,3223 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * KVM/MIPS: Support for hardware virtualization extensions
+ *
+ * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
+ * Authors: Yann Le Du <[email protected]>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheops.h>
+#include <asm/cmpxchg.h>
+#include <asm/fpu.h>
+#include <asm/hazards.h>
+#include <asm/inst.h>
+#include <asm/mmu_context.h>
+#include <asm/r4kcache.h>
+#include <asm/time.h>
+#include <asm/tlb.h>
+#include <asm/tlbex.h>
+
+#include <linux/kvm_host.h>
+
+#include "interrupt.h"
+
+#include "trace.h"
+
+/* Pointers to last VCPU loaded on each physical CPU */
+static struct kvm_vcpu *last_vcpu[NR_CPUS];
+/* Pointers to last VCPU executed on each physical CPU */
+static struct kvm_vcpu *last_exec_vcpu[NR_CPUS];
+
+/*
+ * Number of guest VTLB entries to use, so we can catch inconsistency between
+ * CPUs.
+ */
+static unsigned int kvm_vz_guest_vtlb_size;
+
+static inline long kvm_vz_read_gc0_ebase(void)
+{
+ if (sizeof(long) == 8 && cpu_has_ebase_wg)
+ return read_gc0_ebase_64();
+ else
+ return read_gc0_ebase();
+}
+
+static inline void kvm_vz_write_gc0_ebase(long v)
+{
+ /*
+ * First write with WG=1 to write upper bits, then write again in case
+ * WG should be left at 0.
+ * write_gc0_ebase_64() is no longer UNDEFINED since R6.
+ */
+ if (sizeof(long) == 8 &&
+ (cpu_has_mips64r6 || cpu_has_ebase_wg)) {
+ write_gc0_ebase_64(v | MIPS_EBASE_WG);
+ write_gc0_ebase_64(v);
+ } else {
+ write_gc0_ebase(v | MIPS_EBASE_WG);
+ write_gc0_ebase(v);
+ }
+}
+
+/*
+ * These Config bits may be writable by the guest:
+ * Config: [K23, KU] (!TLB), K0
+ * Config1: (none)
+ * Config2: [TU, SU] (impl)
+ * Config3: ISAOnExc
+ * Config4: FTLBPageSize
+ * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR
+ */
+
+static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return CONF_CM_CMASK;
+}
+
+static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return MIPS_CONF3_ISA_OE;
+}
+
+static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ /* no need to be exact */
+ return MIPS_CONF4_VFTLBPAGESIZE;
+}
+
+static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI;
+
+ /* Permit MSAEn changes if MSA supported and enabled */
+ if (kvm_mips_guest_has_msa(&vcpu->arch))
+ mask |= MIPS_CONF5_MSAEN;
+
+ /*
+ * Permit guest FPU mode changes if FPU is enabled and the relevant
+ * feature exists according to FIR register.
+ */
+ if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+ if (cpu_has_ufr)
+ mask |= MIPS_CONF5_UFR;
+ if (cpu_has_fre)
+ mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE;
+ }
+
+ return mask;
+}
+
+/*
+ * VZ optionally allows these additional Config bits to be written by root:
+ * Config: M, [MT]
+ * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP
+ * Config2: M
+ * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC,
+ * VInt, SP, CDMM, MT, SM, TL]
+ * Config4: M, [VTLBSizeExt, MMUSizeExt]
+ * Config5: MRP
+ */
+
+static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M;
+
+ /* Permit FPU to be present if FPU is supported */
+ if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+ mask |= MIPS_CONF1_FP;
+
+ return mask;
+}
+
+static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M |
+ MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC;
+
+ /* Permit MSA to be present if MSA is supported */
+ if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+ mask |= MIPS_CONF3_MSA;
+
+ return mask;
+}
+
+static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP;
+}
+
+static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva)
+{
+ /* VZ guest has already converted gva to gpa */
+ return gva;
+}
+
+static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+ clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
+}
+
+static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ set_bit(priority, &vcpu->arch.pending_exceptions_clr);
+}
+
+static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu)
+{
+ /*
+ * timer expiry is asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu)
+{
+ /*
+ * timer expiry is asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ /*
+ * interrupts are asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ switch (intr) {
+ case 2:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO);
+ break;
+
+ case 3:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1);
+ break;
+
+ case 4:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2);
+ break;
+
+ default:
+ break;
+ }
+
+}
+
+static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ /*
+ * interrupts are asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ switch (intr) {
+ case -2:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO);
+ break;
+
+ case -3:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1);
+ break;
+
+ case -4:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2);
+ break;
+
+ default:
+ break;
+ }
+
+}
+
+static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = {
+ [MIPS_EXC_INT_TIMER] = C_IRQ5,
+ [MIPS_EXC_INT_IO] = C_IRQ0,
+ [MIPS_EXC_INT_IPI_1] = C_IRQ1,
+ [MIPS_EXC_INT_IPI_2] = C_IRQ2,
+};
+
+static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
+ u32 cause)
+{
+ u32 irq = (priority < MIPS_EXC_MAX) ?
+ kvm_vz_priority_to_irq[priority] : 0;
+
+ switch (priority) {
+ case MIPS_EXC_INT_TIMER:
+ set_gc0_cause(C_TI);
+ break;
+
+ case MIPS_EXC_INT_IO:
+ case MIPS_EXC_INT_IPI_1:
+ case MIPS_EXC_INT_IPI_2:
+ if (cpu_has_guestctl2)
+ set_c0_guestctl2(irq);
+ else
+ set_gc0_cause(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ return 1;
+}
+
+static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
+ u32 cause)
+{
+ u32 irq = (priority < MIPS_EXC_MAX) ?
+ kvm_vz_priority_to_irq[priority] : 0;
+
+ switch (priority) {
+ case MIPS_EXC_INT_TIMER:
+ /*
+ * Call to kvm_write_c0_guest_compare() clears Cause.TI in
+ * kvm_mips_emulate_CP0(). Explicitly clear irq associated with
+ * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not
+ * supported or if not using GuestCtl2 Hardware Clear.
+ */
+ if (cpu_has_guestctl2) {
+ if (!(read_c0_guestctl2() & (irq << 14)))
+ clear_c0_guestctl2(irq);
+ } else {
+ clear_gc0_cause(irq);
+ }
+ break;
+
+ case MIPS_EXC_INT_IO:
+ case MIPS_EXC_INT_IPI_1:
+ case MIPS_EXC_INT_IPI_2:
+ /* Clear GuestCtl2.VIP irq if not using Hardware Clear */
+ if (cpu_has_guestctl2) {
+ if (!(read_c0_guestctl2() & (irq << 14)))
+ clear_c0_guestctl2(irq);
+ } else {
+ clear_gc0_cause(irq);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
+ return 1;
+}
+
+/*
+ * VZ guest timer handling.
+ */
+
+/**
+ * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer.
+ * @vcpu: Virtual CPU.
+ *
+ * Returns: true if the VZ GTOffset & real guest CP0_Count should be used
+ * instead of software emulation of guest timer.
+ * false otherwise.
+ */
+static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu)
+{
+ if (kvm_mips_count_disabled(vcpu))
+ return false;
+
+ /* Chosen frequency must match real frequency */
+ if (mips_hpt_frequency != vcpu->arch.count_hz)
+ return false;
+
+ /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */
+ if (current_cpu_data.gtoffset_mask != 0xffffffff)
+ return false;
+
+ return true;
+}
+
+/**
+ * _kvm_vz_restore_stimer() - Restore soft timer state.
+ * @vcpu: Virtual CPU.
+ * @compare: CP0_Compare register value, restored by caller.
+ * @cause: CP0_Cause register to restore.
+ *
+ * Restore VZ state relating to the soft timer. The hard timer can be enabled
+ * later.
+ */
+static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare,
+ u32 cause)
+{
+ /*
+ * Avoid spurious counter interrupts by setting Guest CP0_Count to just
+ * after Guest CP0_Compare.
+ */
+ write_c0_gtoffset(compare - read_c0_count());
+
+ back_to_back_c0_hazard();
+ write_gc0_cause(cause);
+}
+
+/**
+ * _kvm_vz_restore_htimer() - Restore hard timer state.
+ * @vcpu: Virtual CPU.
+ * @compare: CP0_Compare register value, restored by caller.
+ * @cause: CP0_Cause register to restore.
+ *
+ * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the
+ * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause.
+ */
+static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
+ u32 compare, u32 cause)
+{
+ u32 start_count, after_count;
+ ktime_t freeze_time;
+ unsigned long flags;
+
+ /*
+ * Freeze the soft-timer and sync the guest CP0_Count with it. We do
+ * this with interrupts disabled to avoid latency.
+ */
+ local_irq_save(flags);
+ freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
+ write_c0_gtoffset(start_count - read_c0_count());
+ local_irq_restore(flags);
+
+ /* restore guest CP0_Cause, as TI may already be set */
+ back_to_back_c0_hazard();
+ write_gc0_cause(cause);
+
+ /*
+ * The above sequence isn't atomic and would result in lost timer
+ * interrupts if we're not careful. Detect if a timer interrupt is due
+ * and assert it.
+ */
+ back_to_back_c0_hazard();
+ after_count = read_gc0_count();
+ if (after_count - start_count > compare - start_count - 1)
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+/**
+ * kvm_vz_restore_timer() - Restore timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Restore soft timer state from saved context.
+ */
+static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ u32 cause, compare;
+
+ compare = kvm_read_sw_gc0_compare(cop0);
+ cause = kvm_read_sw_gc0_cause(cop0);
+
+ write_gc0_compare(compare);
+ _kvm_vz_restore_stimer(vcpu, compare, cause);
+}
+
+/**
+ * kvm_vz_acquire_htimer() - Switch to hard timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Restore hard timer state on top of existing soft timer state if possible.
+ *
+ * Since hard timer won't remain active over preemption, preemption should be
+ * disabled by the caller.
+ */
+void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
+{
+ u32 gctl0;
+
+ gctl0 = read_c0_guestctl0();
+ if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) {
+ /* enable guest access to hard timer */
+ write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT);
+
+ _kvm_vz_restore_htimer(vcpu, read_gc0_compare(),
+ read_gc0_cause());
+ }
+}
+
+/**
+ * _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
+ * @vcpu: Virtual CPU.
+ * @compare: Pointer to write compare value to.
+ * @cause: Pointer to write cause value to.
+ *
+ * Save VZ guest timer state and switch to software emulation of guest CP0
+ * timer. The hard timer must already be in use, so preemption should be
+ * disabled.
+ */
+static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu,
+ u32 *out_compare, u32 *out_cause)
+{
+ u32 cause, compare, before_count, end_count;
+ ktime_t before_time;
+
+ compare = read_gc0_compare();
+ *out_compare = compare;
+
+ before_time = ktime_get();
+
+ /*
+ * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time
+ * at which no pending timer interrupt is missing.
+ */
+ before_count = read_gc0_count();
+ back_to_back_c0_hazard();
+ cause = read_gc0_cause();
+ *out_cause = cause;
+
+ /*
+ * Record a final CP0_Count which we will transfer to the soft-timer.
+ * This is recorded *after* saving CP0_Cause, so we don't get any timer
+ * interrupts from just after the final CP0_Count point.
+ */
+ back_to_back_c0_hazard();
+ end_count = read_gc0_count();
+
+ /*
+ * The above sequence isn't atomic, so we could miss a timer interrupt
+ * between reading CP0_Cause and end_count. Detect and record any timer
+ * interrupt due between before_count and end_count.
+ */
+ if (end_count - before_count > compare - before_count - 1)
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+
+ /*
+ * Restore soft-timer, ignoring a small amount of negative drift due to
+ * delay between freeze_hrtimer and setting CP0_GTOffset.
+ */
+ kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000);
+}
+
+/**
+ * kvm_vz_save_timer() - Save guest timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Save VZ guest timer state and switch to soft guest timer if hard timer was in
+ * use.
+ */
+static void kvm_vz_save_timer(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ u32 gctl0, compare, cause;
+
+ gctl0 = read_c0_guestctl0();
+ if (gctl0 & MIPS_GCTL0_GT) {
+ /* disable guest use of hard timer */
+ write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+ /* save hard timer state */
+ _kvm_vz_save_htimer(vcpu, &compare, &cause);
+ } else {
+ compare = read_gc0_compare();
+ cause = read_gc0_cause();
+ }
+
+ /* save timer-related state to VCPU context */
+ kvm_write_sw_gc0_cause(cop0, cause);
+ kvm_write_sw_gc0_compare(cop0, compare);
+}
+
+/**
+ * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use.
+ * @vcpu: Virtual CPU.
+ *
+ * Transfers the state of the hard guest timer to the soft guest timer, leaving
+ * guest state intact so it can continue to be used with the soft timer.
+ */
+void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu)
+{
+ u32 gctl0, compare, cause;
+
+ preempt_disable();
+ gctl0 = read_c0_guestctl0();
+ if (gctl0 & MIPS_GCTL0_GT) {
+ /* disable guest use of timer */
+ write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+ /* switch to soft timer */
+ _kvm_vz_save_htimer(vcpu, &compare, &cause);
+
+ /* leave soft timer in usable state */
+ _kvm_vz_restore_stimer(vcpu, compare, cause);
+ }
+ preempt_enable();
+}
+
+/**
+ * is_eva_access() - Find whether an instruction is an EVA memory accessor.
+ * @inst: 32-bit instruction encoding.
+ *
+ * Finds whether @inst encodes an EVA memory access instruction, which would
+ * indicate that emulation of it should access the user mode address space
+ * instead of the kernel mode address space. This matters for MUSUK segments
+ * which are TLB mapped for user mode but unmapped for kernel mode.
+ *
+ * Returns: Whether @inst encodes an EVA accessor instruction.
+ */
+static bool is_eva_access(union mips_instruction inst)
+{
+ if (inst.spec3_format.opcode != spec3_op)
+ return false;
+
+ switch (inst.spec3_format.func) {
+ case lwle_op:
+ case lwre_op:
+ case cachee_op:
+ case sbe_op:
+ case she_op:
+ case sce_op:
+ case swe_op:
+ case swle_op:
+ case swre_op:
+ case prefe_op:
+ case lbue_op:
+ case lhue_op:
+ case lbe_op:
+ case lhe_op:
+ case lle_op:
+ case lwe_op:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * is_eva_am_mapped() - Find whether an access mode is mapped.
+ * @vcpu: KVM VCPU state.
+ * @am: 3-bit encoded access mode.
+ * @eu: Segment becomes unmapped and uncached when Status.ERL=1.
+ *
+ * Decode @am to find whether it encodes a mapped segment for the current VCPU
+ * state. Where necessary @eu and the actual instruction causing the fault are
+ * taken into account to make the decision.
+ *
+ * Returns: Whether the VCPU faulted on a TLB mapped address.
+ */
+static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu)
+{
+ u32 am_lookup;
+ int err;
+
+ /*
+ * Interpret access control mode. We assume address errors will already
+ * have been caught by the guest, leaving us with:
+ * AM UM SM KM 31..24 23..16
+ * UK 0 000 Unm 0 0
+ * MK 1 001 TLB 1
+ * MSK 2 010 TLB TLB 1
+ * MUSK 3 011 TLB TLB TLB 1
+ * MUSUK 4 100 TLB TLB Unm 0 1
+ * USK 5 101 Unm Unm 0 0
+ * - 6 110 0 0
+ * UUSK 7 111 Unm Unm Unm 0 0
+ *
+ * We shift a magic value by AM across the sign bit to find if always
+ * TLB mapped, and if not shift by 8 again to find if it depends on KM.
+ */
+ am_lookup = 0x70080000 << am;
+ if ((s32)am_lookup < 0) {
+ /*
+ * MK, MSK, MUSK
+ * Always TLB mapped, unless SegCtl.EU && ERL
+ */
+ if (!eu || !(read_gc0_status() & ST0_ERL))
+ return true;
+ } else {
+ am_lookup <<= 8;
+ if ((s32)am_lookup < 0) {
+ union mips_instruction inst;
+ unsigned int status;
+ u32 *opc;
+
+ /*
+ * MUSUK
+ * TLB mapped if not in kernel mode
+ */
+ status = read_gc0_status();
+ if (!(status & (ST0_EXL | ST0_ERL)) &&
+ (status & ST0_KSU))
+ return true;
+ /*
+ * EVA access instructions in kernel
+ * mode access user address space.
+ */
+ opc = (u32 *)vcpu->arch.pc;
+ if (vcpu->arch.host_cp0_cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (!err && is_eva_access(inst))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA.
+ * @vcpu: KVM VCPU state.
+ * @gva: Guest virtual address to convert.
+ * @gpa: Output guest physical address.
+ *
+ * Convert a guest virtual address (GVA) which is valid according to the guest
+ * context, to a guest physical address (GPA).
+ *
+ * Returns: 0 on success.
+ * -errno on failure.
+ */
+static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa)
+{
+ u32 gva32 = gva;
+ unsigned long segctl;
+
+ if ((long)gva == (s32)gva32) {
+ /* Handle canonical 32-bit virtual address */
+ if (cpu_guest_has_segments) {
+ unsigned long mask, pa;
+
+ switch (gva32 >> 29) {
+ case 0:
+ case 1: /* CFG5 (1GB) */
+ segctl = read_gc0_segctl2() >> 16;
+ mask = (unsigned long)0xfc0000000ull;
+ break;
+ case 2:
+ case 3: /* CFG4 (1GB) */
+ segctl = read_gc0_segctl2();
+ mask = (unsigned long)0xfc0000000ull;
+ break;
+ case 4: /* CFG3 (512MB) */
+ segctl = read_gc0_segctl1() >> 16;
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 5: /* CFG2 (512MB) */
+ segctl = read_gc0_segctl1();
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 6: /* CFG1 (512MB) */
+ segctl = read_gc0_segctl0() >> 16;
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 7: /* CFG0 (512MB) */
+ segctl = read_gc0_segctl0();
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ default:
+ /*
+ * GCC 4.9 isn't smart enough to figure out that
+ * segctl and mask are always initialised.
+ */
+ unreachable();
+ }
+
+ if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7,
+ segctl & 0x0008))
+ goto tlb_mapped;
+
+ /* Unmapped, find guest physical address */
+ pa = (segctl << 20) & mask;
+ pa |= gva32 & ~mask;
+ *gpa = pa;
+ return 0;
+ } else if ((s32)gva32 < (s32)0xc0000000) {
+ /* legacy unmapped KSeg0 or KSeg1 */
+ *gpa = gva32 & 0x1fffffff;
+ return 0;
+ }
+#ifdef CONFIG_64BIT
+ } else if ((gva & 0xc000000000000000) == 0x8000000000000000) {
+ /* XKPHYS */
+ if (cpu_guest_has_segments) {
+ /*
+ * Each of the 8 regions can be overridden by SegCtl2.XR
+ * to use SegCtl1.XAM.
+ */
+ segctl = read_gc0_segctl2();
+ if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) {
+ segctl = read_gc0_segctl1();
+ if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7,
+ 0))
+ goto tlb_mapped;
+ }
+
+ }
+ /*
+ * Traditionally fully unmapped.
+ * Bits 61:59 specify the CCA, which we can just mask off here.
+ * Bits 58:PABITS should be zero, but we shouldn't have got here
+ * if it wasn't.
+ */
+ *gpa = gva & 0x07ffffffffffffff;
+ return 0;
+#endif
+ }
+
+tlb_mapped:
+ return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa);
+}
+
+/**
+ * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA.
+ * @vcpu: KVM VCPU state.
+ * @badvaddr: Root BadVAddr.
+ * @gpa: Output guest physical address.
+ *
+ * VZ implementations are permitted to report guest virtual addresses (GVA) in
+ * BadVAddr on a root exception during guest execution, instead of the more
+ * convenient guest physical addresses (GPA). When we get a GVA, this function
+ * converts it to a GPA, taking into account guest segmentation and guest TLB
+ * state.
+ *
+ * Returns: 0 on success.
+ * -errno on failure.
+ */
+static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr,
+ unsigned long *gpa)
+{
+ unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 &
+ MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
+
+ /* If BadVAddr is GPA, then all is well in the world */
+ if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) {
+ *gpa = badvaddr;
+ return 0;
+ }
+
+ /* Otherwise we'd expect it to be GVA ... */
+ if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA,
+ "Unexpected gexccode %#x\n", gexccode))
+ return -EINVAL;
+
+ /* ... and we need to perform the GVA->GPA translation in software */
+ return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa);
+}
+
+static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu)
+{
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
+ unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
+ u32 inst = 0;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
+ exccode, opc, inst, badvaddr,
+ read_gc0_status());
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+}
+
+static unsigned long mips_process_maar(unsigned int op, unsigned long val)
+{
+ /* Mask off unused bits */
+ unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL;
+
+ if (read_gc0_pagegrain() & PG_ELPA)
+ mask |= 0x00ffffff00000000ull;
+ if (cpu_guest_has_mvh)
+ mask |= MIPS_MAAR_VH;
+
+ /* Set or clear VH */
+ if (op == mtc_op) {
+ /* clear VH */
+ val &= ~MIPS_MAAR_VH;
+ } else if (op == dmtc_op) {
+ /* set VH to match VL */
+ val &= ~MIPS_MAAR_VH;
+ if (val & MIPS_MAAR_VL)
+ val |= MIPS_MAAR_VH;
+ }
+
+ return val & mask;
+}
+
+static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ val &= MIPS_MAARI_INDEX;
+ if (val == MIPS_MAARI_INDEX)
+ kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1);
+ else if (val < ARRAY_SIZE(vcpu->arch.maar))
+ kvm_write_sw_gc0_maari(cop0, val);
+}
+
+static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
+ u32 *opc, u32 cause,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ enum emulation_result er = EMULATE_DONE;
+ u32 rt, rd, sel;
+ unsigned long curr_pc;
+ unsigned long val;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ if (inst.co_format.co) {
+ switch (inst.co_format.func) {
+ case wait_op:
+ er = kvm_mips_emul_wait(vcpu);
+ break;
+ default:
+ er = EMULATE_FAIL;
+ }
+ } else {
+ rt = inst.c0r_format.rt;
+ rd = inst.c0r_format.rd;
+ sel = inst.c0r_format.sel;
+
+ switch (inst.c0r_format.rs) {
+ case dmfc_op:
+ case mfc_op:
+#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
+ cop0->stat[rd][sel]++;
+#endif
+ if (rd == MIPS_CP0_COUNT &&
+ sel == 0) { /* Count */
+ val = kvm_mips_read_count(vcpu);
+ } else if (rd == MIPS_CP0_COMPARE &&
+ sel == 0) { /* Compare */
+ val = read_gc0_compare();
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 0) { /* LLAddr */
+ if (cpu_guest_has_rw_llb)
+ val = read_gc0_lladdr() &
+ MIPS_LLADDR_LLB;
+ else
+ val = 0;
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 1 && /* MAAR */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ /* MAARI must be in range */
+ BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
+ ARRAY_SIZE(vcpu->arch.maar));
+ val = vcpu->arch.maar[
+ kvm_read_sw_gc0_maari(cop0)];
+ } else if ((rd == MIPS_CP0_PRID &&
+ (sel == 0 || /* PRid */
+ sel == 2 || /* CDMMBase */
+ sel == 3)) || /* CMGCRBase */
+ (rd == MIPS_CP0_STATUS &&
+ (sel == 2 || /* SRSCtl */
+ sel == 3)) || /* SRSMap */
+ (rd == MIPS_CP0_CONFIG &&
+ (sel == 7)) || /* Config7 */
+ (rd == MIPS_CP0_LLADDR &&
+ (sel == 2) && /* MAARI */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) ||
+ (rd == MIPS_CP0_ERRCTL &&
+ (sel == 0))) { /* ErrCtl */
+ val = cop0->reg[rd][sel];
+ } else {
+ val = 0;
+ er = EMULATE_FAIL;
+ }
+
+ if (er != EMULATE_FAIL) {
+ /* Sign extend */
+ if (inst.c0r_format.rs == mfc_op)
+ val = (int)val;
+ vcpu->arch.gprs[rt] = val;
+ }
+
+ trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ?
+ KVM_TRACE_MFC0 : KVM_TRACE_DMFC0,
+ KVM_TRACE_COP0(rd, sel), val);
+ break;
+
+ case dmtc_op:
+ case mtc_op:
+#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
+ cop0->stat[rd][sel]++;
+#endif
+ val = vcpu->arch.gprs[rt];
+ trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ?
+ KVM_TRACE_MTC0 : KVM_TRACE_DMTC0,
+ KVM_TRACE_COP0(rd, sel), val);
+
+ if (rd == MIPS_CP0_COUNT &&
+ sel == 0) { /* Count */
+ kvm_vz_lose_htimer(vcpu);
+ kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
+ } else if (rd == MIPS_CP0_COMPARE &&
+ sel == 0) { /* Compare */
+ kvm_mips_write_compare(vcpu,
+ vcpu->arch.gprs[rt],
+ true);
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 0) { /* LLAddr */
+ /*
+ * P5600 generates GPSI on guest MTC0 LLAddr.
+ * Only allow the guest to clear LLB.
+ */
+ if (cpu_guest_has_rw_llb &&
+ !(val & MIPS_LLADDR_LLB))
+ write_gc0_lladdr(0);
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 1 && /* MAAR */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ val = mips_process_maar(inst.c0r_format.rs,
+ val);
+
+ /* MAARI must be in range */
+ BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
+ ARRAY_SIZE(vcpu->arch.maar));
+ vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] =
+ val;
+ } else if (rd == MIPS_CP0_LLADDR &&
+ (sel == 2) && /* MAARI */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ kvm_write_maari(vcpu, val);
+ } else if (rd == MIPS_CP0_ERRCTL &&
+ (sel == 0)) { /* ErrCtl */
+ /* ignore the written value */
+ } else {
+ er = EMULATE_FAIL;
+ }
+ break;
+
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ }
+ /* Rollback PC only if emulation was unsuccessful */
+ if (er == EMULATE_FAIL) {
+ kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n",
+ curr_pc, __func__, inst.word);
+
+ vcpu->arch.pc = curr_pc;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
+ u32 *opc, u32 cause,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ u32 cache, op_inst, op, base;
+ s16 offset;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ unsigned long va, curr_pc;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ base = inst.i_format.rs;
+ op_inst = inst.i_format.rt;
+ if (cpu_has_mips_r6)
+ offset = inst.spec3_format.simmediate;
+ else
+ offset = inst.i_format.simmediate;
+ cache = op_inst & CacheOp_Cache;
+ op = op_inst & CacheOp_Op;
+
+ va = arch->gprs[base] + offset;
+
+ kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
+ cache, op, base, arch->gprs[base], offset);
+
+ /* Secondary or tirtiary cache ops ignored */
+ if (cache != Cache_I && cache != Cache_D)
+ return EMULATE_DONE;
+
+ switch (op_inst) {
+ case Index_Invalidate_I:
+ flush_icache_line_indexed(va);
+ return EMULATE_DONE;
+ case Index_Writeback_Inv_D:
+ flush_dcache_line_indexed(va);
+ return EMULATE_DONE;
+ case Hit_Invalidate_I:
+ case Hit_Invalidate_D:
+ case Hit_Writeback_Inv_D:
+ if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) {
+ /* We can just flush entire icache */
+ local_flush_icache_range(0, 0);
+ return EMULATE_DONE;
+ }
+
+ /* So far, other platforms support guest hit cache ops */
+ break;
+ default:
+ break;
+ };
+
+ kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
+ curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base],
+ offset);
+ /* Rollback PC */
+ vcpu->arch.pc = curr_pc;
+
+ return EMULATE_FAIL;
+}
+
+static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ struct kvm_run *run = vcpu->run;
+ union mips_instruction inst;
+ int rd, rt, sel;
+ int err;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ switch (inst.r_format.opcode) {
+ case cop0_op:
+ er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu);
+ break;
+#ifndef CONFIG_CPU_MIPSR6
+ case cache_op:
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+ er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
+ break;
+#endif
+ case spec3_op:
+ switch (inst.spec3_format.func) {
+#ifdef CONFIG_CPU_MIPSR6
+ case cache6_op:
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+ er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
+ break;
+#endif
+ case rdhwr_op:
+ if (inst.r_format.rs || (inst.r_format.re >> 3))
+ goto unknown;
+
+ rd = inst.r_format.rd;
+ rt = inst.r_format.rt;
+ sel = inst.r_format.re & 0x7;
+
+ switch (rd) {
+ case MIPS_HWR_CC: /* Read count register */
+ arch->gprs[rt] =
+ (long)(int)kvm_mips_read_count(vcpu);
+ break;
+ default:
+ trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
+ KVM_TRACE_HWR(rd, sel), 0);
+ goto unknown;
+ };
+
+ trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
+ KVM_TRACE_HWR(rd, sel), arch->gprs[rt]);
+
+ er = update_pc(vcpu, cause);
+ break;
+ default:
+ goto unknown;
+ };
+ break;
+unknown:
+
+ default:
+ kvm_err("GPSI exception not supported (%p/%#x)\n",
+ opc, inst.word);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ union mips_instruction inst;
+ int err;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ /* complete MTC0 on behalf of guest and advance EPC */
+ if (inst.c0r_format.opcode == cop0_op &&
+ inst.c0r_format.rs == mtc_op &&
+ inst.c0r_format.z == 0) {
+ int rt = inst.c0r_format.rt;
+ int rd = inst.c0r_format.rd;
+ int sel = inst.c0r_format.sel;
+ unsigned int val = arch->gprs[rt];
+ unsigned int old_val, change;
+
+ trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel),
+ val);
+
+ if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
+ /* FR bit should read as zero if no FPU */
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ val &= ~(ST0_CU1 | ST0_FR);
+
+ /*
+ * Also don't allow FR to be set if host doesn't support
+ * it.
+ */
+ if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64))
+ val &= ~ST0_FR;
+
+ old_val = read_gc0_status();
+ change = val ^ old_val;
+
+ if (change & ST0_FR) {
+ /*
+ * FPU and Vector register state is made
+ * UNPREDICTABLE by a change of FR, so don't
+ * even bother saving it.
+ */
+ kvm_drop_fpu(vcpu);
+ }
+
+ /*
+ * If MSA state is already live, it is undefined how it
+ * interacts with FR=0 FPU state, and we don't want to
+ * hit reserved instruction exceptions trying to save
+ * the MSA state later when CU=1 && FR=1, so play it
+ * safe and save it first.
+ */
+ if (change & ST0_CU1 && !(val & ST0_FR) &&
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
+ kvm_lose_fpu(vcpu);
+
+ write_gc0_status(val);
+ } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
+ u32 old_cause = read_gc0_cause();
+ u32 change = old_cause ^ val;
+
+ /* DC bit enabling/disabling timer? */
+ if (change & CAUSEF_DC) {
+ if (val & CAUSEF_DC) {
+ kvm_vz_lose_htimer(vcpu);
+ kvm_mips_count_disable_cause(vcpu);
+ } else {
+ kvm_mips_count_enable_cause(vcpu);
+ }
+ }
+
+ /* Only certain bits are RW to the guest */
+ change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP |
+ CAUSEF_IP0 | CAUSEF_IP1);
+
+ /* WP can only be cleared */
+ change &= ~CAUSEF_WP | old_cause;
+
+ write_gc0_cause(old_cause ^ change);
+ } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */
+ write_gc0_intctl(val);
+ } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+ old_val = read_gc0_config5();
+ change = val ^ old_val;
+ /* Handle changes in FPU/MSA modes */
+ preempt_disable();
+
+ /*
+ * Propagate FRE changes immediately if the FPU
+ * context is already loaded.
+ */
+ if (change & MIPS_CONF5_FRE &&
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)
+ change_c0_config5(MIPS_CONF5_FRE, val);
+
+ preempt_enable();
+
+ val = old_val ^
+ (change & kvm_vz_config5_guest_wrmask(vcpu));
+ write_gc0_config5(val);
+ } else {
+ kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n",
+ opc, inst.word);
+ er = EMULATE_FAIL;
+ }
+
+ if (er != EMULATE_FAIL)
+ er = update_pc(vcpu, cause);
+ } else {
+ kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n",
+ opc, inst.word);
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ /*
+ * Presumably this is due to MC (guest mode change), so lets trace some
+ * relevant info.
+ */
+ trace_kvm_guest_mode_change(vcpu);
+
+ return EMULATE_DONE;
+}
+
+static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+ union mips_instruction inst;
+ unsigned long curr_pc;
+ int err;
+
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ er = kvm_mips_emul_hypcall(vcpu, inst);
+ if (er == EMULATE_FAIL)
+ vcpu->arch.pc = curr_pc;
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode,
+ u32 cause,
+ u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ u32 inst;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n",
+ gexccode, opc, inst, read_gc0_status());
+
+ return EMULATE_FAIL;
+}
+
+static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
+{
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ u32 gexccode = (vcpu->arch.host_cp0_guestctl0 &
+ MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
+ int ret = RESUME_GUEST;
+
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode);
+ switch (gexccode) {
+ case MIPS_GCTL0_GEXC_GPSI:
+ ++vcpu->stat.vz_gpsi_exits;
+ er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GSFC:
+ ++vcpu->stat.vz_gsfc_exits;
+ er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_HC:
+ ++vcpu->stat.vz_hc_exits;
+ er = kvm_trap_vz_handle_hc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GRR:
+ ++vcpu->stat.vz_grr_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GVA:
+ ++vcpu->stat.vz_gva_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GHFC:
+ ++vcpu->stat.vz_ghfc_exits;
+ er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GPA:
+ ++vcpu->stat.vz_gpa_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ default:
+ ++vcpu->stat.vz_resvd_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_HYPERCALL) {
+ ret = kvm_mips_handle_hypcall(vcpu);
+ } else {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+/**
+ * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use a coprocessor which hasn't been allowed
+ * by the root context.
+ */
+static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_FAIL;
+ int ret = RESUME_GUEST;
+
+ if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+ /*
+ * If guest FPU not present, the FPU operation should have been
+ * treated as a reserved instruction!
+ * If FPU already in use, we shouldn't get this at all.
+ */
+ if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) {
+ preempt_enable();
+ return EMULATE_FAIL;
+ }
+
+ kvm_own_fpu(vcpu);
+ er = EMULATE_DONE;
+ }
+ /* other coprocessors not handled */
+
+ switch (er) {
+ case EMULATE_DONE:
+ ret = RESUME_GUEST;
+ break;
+
+ case EMULATE_FAIL:
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ break;
+
+ default:
+ BUG();
+ }
+ return ret;
+}
+
+/**
+ * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled in the root
+ * context.
+ */
+static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /*
+ * If MSA not present or not exposed to guest or FR=0, the MSA operation
+ * should have been treated as a reserved instruction!
+ * Same if CU1=1, FR=0.
+ * If MSA already in use, we shouldn't get this at all.
+ */
+ if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+ (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 ||
+ !(read_gc0_config5() & MIPS_CONF5_MSAEN) ||
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ kvm_own_msa(vcpu);
+
+ return RESUME_GUEST;
+}
+
+static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
+ union mips_instruction inst;
+ enum emulation_result er = EMULATE_DONE;
+ int err, ret = RESUME_GUEST;
+
+ if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) {
+ /* A code fetch fault doesn't count as an MMIO */
+ if (kvm_is_ifetch_fault(&vcpu->arch)) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Fetch the instruction */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Treat as MMIO */
+ er = kvm_mips_emulate_load(inst, cause, run, vcpu);
+ if (er == EMULATE_FAIL) {
+ kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n",
+ opc, badvaddr);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
+ union mips_instruction inst;
+ enum emulation_result er = EMULATE_DONE;
+ int err;
+ int ret = RESUME_GUEST;
+
+ /* Just try the access again if we couldn't do the translation */
+ if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr))
+ return RESUME_GUEST;
+ vcpu->arch.host_cp0_badvaddr = badvaddr;
+
+ if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) {
+ /* Fetch the instruction */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Treat as MMIO */
+ er = kvm_mips_emulate_store(inst, cause, run, vcpu);
+ if (er == EMULATE_FAIL) {
+ kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n",
+ opc, badvaddr);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static u64 kvm_vz_get_one_regs[] = {
+ KVM_REG_MIPS_CP0_INDEX,
+ KVM_REG_MIPS_CP0_ENTRYLO0,
+ KVM_REG_MIPS_CP0_ENTRYLO1,
+ KVM_REG_MIPS_CP0_CONTEXT,
+ KVM_REG_MIPS_CP0_PAGEMASK,
+ KVM_REG_MIPS_CP0_PAGEGRAIN,
+ KVM_REG_MIPS_CP0_WIRED,
+ KVM_REG_MIPS_CP0_HWRENA,
+ KVM_REG_MIPS_CP0_BADVADDR,
+ KVM_REG_MIPS_CP0_COUNT,
+ KVM_REG_MIPS_CP0_ENTRYHI,
+ KVM_REG_MIPS_CP0_COMPARE,
+ KVM_REG_MIPS_CP0_STATUS,
+ KVM_REG_MIPS_CP0_INTCTL,
+ KVM_REG_MIPS_CP0_CAUSE,
+ KVM_REG_MIPS_CP0_EPC,
+ KVM_REG_MIPS_CP0_PRID,
+ KVM_REG_MIPS_CP0_EBASE,
+ KVM_REG_MIPS_CP0_CONFIG,
+ KVM_REG_MIPS_CP0_CONFIG1,
+ KVM_REG_MIPS_CP0_CONFIG2,
+ KVM_REG_MIPS_CP0_CONFIG3,
+ KVM_REG_MIPS_CP0_CONFIG4,
+ KVM_REG_MIPS_CP0_CONFIG5,
+#ifdef CONFIG_64BIT
+ KVM_REG_MIPS_CP0_XCONTEXT,
+#endif
+ KVM_REG_MIPS_CP0_ERROREPC,
+
+ KVM_REG_MIPS_COUNT_CTL,
+ KVM_REG_MIPS_COUNT_RESUME,
+ KVM_REG_MIPS_COUNT_HZ,
+};
+
+static u64 kvm_vz_get_one_regs_contextconfig[] = {
+ KVM_REG_MIPS_CP0_CONTEXTCONFIG,
+#ifdef CONFIG_64BIT
+ KVM_REG_MIPS_CP0_XCONTEXTCONFIG,
+#endif
+};
+
+static u64 kvm_vz_get_one_regs_segments[] = {
+ KVM_REG_MIPS_CP0_SEGCTL0,
+ KVM_REG_MIPS_CP0_SEGCTL1,
+ KVM_REG_MIPS_CP0_SEGCTL2,
+};
+
+static u64 kvm_vz_get_one_regs_htw[] = {
+ KVM_REG_MIPS_CP0_PWBASE,
+ KVM_REG_MIPS_CP0_PWFIELD,
+ KVM_REG_MIPS_CP0_PWSIZE,
+ KVM_REG_MIPS_CP0_PWCTL,
+};
+
+static u64 kvm_vz_get_one_regs_kscratch[] = {
+ KVM_REG_MIPS_CP0_KSCRATCH1,
+ KVM_REG_MIPS_CP0_KSCRATCH2,
+ KVM_REG_MIPS_CP0_KSCRATCH3,
+ KVM_REG_MIPS_CP0_KSCRATCH4,
+ KVM_REG_MIPS_CP0_KSCRATCH5,
+ KVM_REG_MIPS_CP0_KSCRATCH6,
+};
+
+static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu)
+{
+ unsigned long ret;
+
+ ret = ARRAY_SIZE(kvm_vz_get_one_regs);
+ if (cpu_guest_has_userlocal)
+ ++ret;
+ if (cpu_guest_has_badinstr)
+ ++ret;
+ if (cpu_guest_has_badinstrp)
+ ++ret;
+ if (cpu_guest_has_contextconfig)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
+ if (cpu_guest_has_segments)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
+ if (cpu_guest_has_htw)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
+ if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar)
+ ret += 1 + ARRAY_SIZE(vcpu->arch.maar);
+ ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask);
+
+ return ret;
+}
+
+static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
+{
+ u64 index;
+ unsigned int i;
+
+ if (copy_to_user(indices, kvm_vz_get_one_regs,
+ sizeof(kvm_vz_get_one_regs)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs);
+
+ if (cpu_guest_has_userlocal) {
+ index = KVM_REG_MIPS_CP0_USERLOCAL;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_badinstr) {
+ index = KVM_REG_MIPS_CP0_BADINSTR;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_badinstrp) {
+ index = KVM_REG_MIPS_CP0_BADINSTRP;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_contextconfig) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig,
+ sizeof(kvm_vz_get_one_regs_contextconfig)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
+ }
+ if (cpu_guest_has_segments) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_segments,
+ sizeof(kvm_vz_get_one_regs_segments)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
+ }
+ if (cpu_guest_has_htw) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_htw,
+ sizeof(kvm_vz_get_one_regs_htw)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
+ }
+ if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) {
+ index = KVM_REG_MIPS_CP0_MAAR(i);
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+
+ index = KVM_REG_MIPS_CP0_MAARI;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ for (i = 0; i < 6; ++i) {
+ if (!cpu_guest_has_kscr(i + 2))
+ continue;
+
+ if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i],
+ sizeof(kvm_vz_get_one_regs_kscratch[i])))
+ return -EFAULT;
+ ++indices;
+ }
+
+ return 0;
+}
+
+static inline s64 entrylo_kvm_to_user(unsigned long v)
+{
+ s64 mask, ret = v;
+
+ if (BITS_PER_LONG == 32) {
+ /*
+ * KVM API exposes 64-bit version of the register, so move the
+ * RI/XI bits up into place.
+ */
+ mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
+ ret &= ~mask;
+ ret |= ((s64)v & mask) << 32;
+ }
+ return ret;
+}
+
+static inline unsigned long entrylo_user_to_kvm(s64 v)
+{
+ unsigned long mask, ret = v;
+
+ if (BITS_PER_LONG == 32) {
+ /*
+ * KVM API exposes 64-bit versiono of the register, so move the
+ * RI/XI bits down into place.
+ */
+ mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
+ ret &= ~mask;
+ ret |= (v >> 32) & mask;
+ }
+ return ret;
+}
+
+static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 *v)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int idx;
+
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_INDEX:
+ *v = (long)read_gc0_index();
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO0:
+ *v = entrylo_kvm_to_user(read_gc0_entrylo0());
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO1:
+ *v = entrylo_kvm_to_user(read_gc0_entrylo1());
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXT:
+ *v = (long)read_gc0_context();
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ *v = read_gc0_contextconfig();
+ break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ if (!cpu_guest_has_userlocal)
+ return -EINVAL;
+ *v = read_gc0_userlocal();
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ *v = read_gc0_xcontextconfig();
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_PAGEMASK:
+ *v = (long)read_gc0_pagemask();
+ break;
+ case KVM_REG_MIPS_CP0_PAGEGRAIN:
+ *v = (long)read_gc0_pagegrain();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL0:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl0();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL1:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl1();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL2:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl2();
+ break;
+ case KVM_REG_MIPS_CP0_PWBASE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwbase();
+ break;
+ case KVM_REG_MIPS_CP0_PWFIELD:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwfield();
+ break;
+ case KVM_REG_MIPS_CP0_PWSIZE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwsize();
+ break;
+ case KVM_REG_MIPS_CP0_WIRED:
+ *v = (long)read_gc0_wired();
+ break;
+ case KVM_REG_MIPS_CP0_PWCTL:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwctl();
+ break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ *v = (long)read_gc0_hwrena();
+ break;
+ case KVM_REG_MIPS_CP0_BADVADDR:
+ *v = (long)read_gc0_badvaddr();
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTR:
+ if (!cpu_guest_has_badinstr)
+ return -EINVAL;
+ *v = read_gc0_badinstr();
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTRP:
+ if (!cpu_guest_has_badinstrp)
+ return -EINVAL;
+ *v = read_gc0_badinstrp();
+ break;
+ case KVM_REG_MIPS_CP0_COUNT:
+ *v = kvm_mips_read_count(vcpu);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYHI:
+ *v = (long)read_gc0_entryhi();
+ break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ *v = (long)read_gc0_compare();
+ break;
+ case KVM_REG_MIPS_CP0_STATUS:
+ *v = (long)read_gc0_status();
+ break;
+ case KVM_REG_MIPS_CP0_INTCTL:
+ *v = read_gc0_intctl();
+ break;
+ case KVM_REG_MIPS_CP0_CAUSE:
+ *v = (long)read_gc0_cause();
+ break;
+ case KVM_REG_MIPS_CP0_EPC:
+ *v = (long)read_gc0_epc();
+ break;
+ case KVM_REG_MIPS_CP0_PRID:
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Octeon III has a read-only guest.PRid */
+ *v = read_gc0_prid();
+ break;
+ default:
+ *v = (long)kvm_read_c0_guest_prid(cop0);
+ break;
+ };
+ break;
+ case KVM_REG_MIPS_CP0_EBASE:
+ *v = kvm_vz_read_gc0_ebase();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG:
+ *v = read_gc0_config();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ if (!cpu_guest_has_conf1)
+ return -EINVAL;
+ *v = read_gc0_config1();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ if (!cpu_guest_has_conf2)
+ return -EINVAL;
+ *v = read_gc0_config2();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ if (!cpu_guest_has_conf3)
+ return -EINVAL;
+ *v = read_gc0_config3();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ if (!cpu_guest_has_conf4)
+ return -EINVAL;
+ *v = read_gc0_config4();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ if (!cpu_guest_has_conf5)
+ return -EINVAL;
+ *v = read_gc0_config5();
+ break;
+ case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
+ if (idx >= ARRAY_SIZE(vcpu->arch.maar))
+ return -EINVAL;
+ *v = vcpu->arch.maar[idx];
+ break;
+ case KVM_REG_MIPS_CP0_MAARI:
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXT:
+ *v = read_gc0_xcontext();
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_ERROREPC:
+ *v = (long)read_gc0_errorepc();
+ break;
+ case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+ idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+ if (!cpu_guest_has_kscr(idx))
+ return -EINVAL;
+ switch (idx) {
+ case 2:
+ *v = (long)read_gc0_kscratch1();
+ break;
+ case 3:
+ *v = (long)read_gc0_kscratch2();
+ break;
+ case 4:
+ *v = (long)read_gc0_kscratch3();
+ break;
+ case 5:
+ *v = (long)read_gc0_kscratch4();
+ break;
+ case 6:
+ *v = (long)read_gc0_kscratch5();
+ break;
+ case 7:
+ *v = (long)read_gc0_kscratch6();
+ break;
+ }
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ *v = vcpu->arch.count_ctl;
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ *v = ktime_to_ns(vcpu->arch.count_resume);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ *v = vcpu->arch.count_hz;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 v)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int idx;
+ int ret = 0;
+ unsigned int cur, change;
+
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_INDEX:
+ write_gc0_index(v);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO0:
+ write_gc0_entrylo0(entrylo_user_to_kvm(v));
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO1:
+ write_gc0_entrylo1(entrylo_user_to_kvm(v));
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXT:
+ write_gc0_context(v);
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ write_gc0_contextconfig(v);
+ break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ if (!cpu_guest_has_userlocal)
+ return -EINVAL;
+ write_gc0_userlocal(v);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ write_gc0_xcontextconfig(v);
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_PAGEMASK:
+ write_gc0_pagemask(v);
+ break;
+ case KVM_REG_MIPS_CP0_PAGEGRAIN:
+ write_gc0_pagegrain(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL0:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl0(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL1:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl1(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL2:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl2(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWBASE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwbase(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWFIELD:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwfield(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWSIZE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwsize(v);
+ break;
+ case KVM_REG_MIPS_CP0_WIRED:
+ change_gc0_wired(MIPSR6_WIRED_WIRED, v);
+ break;
+ case KVM_REG_MIPS_CP0_PWCTL:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwctl(v);
+ break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ write_gc0_hwrena(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADVADDR:
+ write_gc0_badvaddr(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTR:
+ if (!cpu_guest_has_badinstr)
+ return -EINVAL;
+ write_gc0_badinstr(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTRP:
+ if (!cpu_guest_has_badinstrp)
+ return -EINVAL;
+ write_gc0_badinstrp(v);
+ break;
+ case KVM_REG_MIPS_CP0_COUNT:
+ kvm_mips_write_count(vcpu, v);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYHI:
+ write_gc0_entryhi(v);
+ break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ kvm_mips_write_compare(vcpu, v, false);
+ break;
+ case KVM_REG_MIPS_CP0_STATUS:
+ write_gc0_status(v);
+ break;
+ case KVM_REG_MIPS_CP0_INTCTL:
+ write_gc0_intctl(v);
+ break;
+ case KVM_REG_MIPS_CP0_CAUSE:
+ /*
+ * If the timer is stopped or started (DC bit) it must look
+ * atomic with changes to the timer interrupt pending bit (TI).
+ * A timer interrupt should not happen in between.
+ */
+ if ((read_gc0_cause() ^ v) & CAUSEF_DC) {
+ if (v & CAUSEF_DC) {
+ /* disable timer first */
+ kvm_mips_count_disable_cause(vcpu);
+ change_gc0_cause((u32)~CAUSEF_DC, v);
+ } else {
+ /* enable timer last */
+ change_gc0_cause((u32)~CAUSEF_DC, v);
+ kvm_mips_count_enable_cause(vcpu);
+ }
+ } else {
+ write_gc0_cause(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_EPC:
+ write_gc0_epc(v);
+ break;
+ case KVM_REG_MIPS_CP0_PRID:
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Octeon III has a guest.PRid, but its read-only */
+ break;
+ default:
+ kvm_write_c0_guest_prid(cop0, v);
+ break;
+ };
+ break;
+ case KVM_REG_MIPS_CP0_EBASE:
+ kvm_vz_write_gc0_ebase(v);
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG:
+ cur = read_gc0_config();
+ change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ if (!cpu_guest_has_conf1)
+ break;
+ cur = read_gc0_config1();
+ change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config1(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ if (!cpu_guest_has_conf2)
+ break;
+ cur = read_gc0_config2();
+ change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config2(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ if (!cpu_guest_has_conf3)
+ break;
+ cur = read_gc0_config3();
+ change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config3(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ if (!cpu_guest_has_conf4)
+ break;
+ cur = read_gc0_config4();
+ change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config4(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ if (!cpu_guest_has_conf5)
+ break;
+ cur = read_gc0_config5();
+ change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config5(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
+ if (idx >= ARRAY_SIZE(vcpu->arch.maar))
+ return -EINVAL;
+ vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v);
+ break;
+ case KVM_REG_MIPS_CP0_MAARI:
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ kvm_write_maari(vcpu, v);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXT:
+ write_gc0_xcontext(v);
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_ERROREPC:
+ write_gc0_errorepc(v);
+ break;
+ case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+ idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+ if (!cpu_guest_has_kscr(idx))
+ return -EINVAL;
+ switch (idx) {
+ case 2:
+ write_gc0_kscratch1(v);
+ break;
+ case 3:
+ write_gc0_kscratch2(v);
+ break;
+ case 4:
+ write_gc0_kscratch3(v);
+ break;
+ case 5:
+ write_gc0_kscratch4(v);
+ break;
+ case 6:
+ write_gc0_kscratch5(v);
+ break;
+ case 7:
+ write_gc0_kscratch6(v);
+ break;
+ }
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ ret = kvm_mips_set_count_ctl(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ ret = kvm_mips_set_count_resume(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ ret = kvm_mips_set_count_hz(vcpu, v);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
+#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache)
+static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu)
+{
+ unsigned long guestid = guestid_cache(cpu);
+
+ if (!(++guestid & GUESTID_MASK)) {
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
+ if (!guestid) /* fix version if needed */
+ guestid = GUESTID_FIRST_VERSION;
+
+ ++guestid; /* guestid 0 reserved for root */
+
+ /* start new guestid cycle */
+ kvm_vz_local_flush_roottlb_all_guests();
+ kvm_vz_local_flush_guesttlb_all();
+ }
+
+ guestid_cache(cpu) = guestid;
+}
+
+/* Returns 1 if the guest TLB may be clobbered */
+static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
+{
+ int ret = 0;
+ int i;
+
+ if (!vcpu->requests)
+ return 0;
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ if (cpu_has_guestid) {
+ /* Drop all GuestIDs for this VCPU */
+ for_each_possible_cpu(i)
+ vcpu->arch.vzguestid[i] = 0;
+ /* This will clobber guest TLB contents too */
+ ret = 1;
+ }
+ /*
+ * For Root ASID Dealias (RAD) we don't do anything here, but we
+ * still need the request to ensure we recheck asid_flush_mask.
+ * We can still return 0 as only the root TLB will be affected
+ * by a root ASID flush.
+ */
+ }
+
+ return ret;
+}
+
+static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu)
+{
+ unsigned int wired = read_gc0_wired();
+ struct kvm_mips_tlb *tlbs;
+ int i;
+
+ /* Expand the wired TLB array if necessary */
+ wired &= MIPSR6_WIRED_WIRED;
+ if (wired > vcpu->arch.wired_tlb_limit) {
+ tlbs = krealloc(vcpu->arch.wired_tlb, wired *
+ sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC);
+ if (WARN_ON(!tlbs)) {
+ /* Save whatever we can */
+ wired = vcpu->arch.wired_tlb_limit;
+ } else {
+ vcpu->arch.wired_tlb = tlbs;
+ vcpu->arch.wired_tlb_limit = wired;
+ }
+ }
+
+ if (wired)
+ /* Save wired entries from the guest TLB */
+ kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired);
+ /* Invalidate any dropped entries since last time */
+ for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) {
+ vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
+ vcpu->arch.wired_tlb[i].tlb_lo[0] = 0;
+ vcpu->arch.wired_tlb[i].tlb_lo[1] = 0;
+ vcpu->arch.wired_tlb[i].tlb_mask = 0;
+ }
+ vcpu->arch.wired_tlb_used = wired;
+}
+
+static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu)
+{
+ /* Load wired entries into the guest TLB */
+ if (vcpu->arch.wired_tlb)
+ kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0,
+ vcpu->arch.wired_tlb_used);
+}
+
+static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct mm_struct *gpa_mm = &kvm->arch.gpa_mm;
+ bool migrated;
+
+ /*
+ * Are we entering guest context on a different CPU to last time?
+ * If so, the VCPU's guest TLB state on this CPU may be stale.
+ */
+ migrated = (vcpu->arch.last_exec_cpu != cpu);
+ vcpu->arch.last_exec_cpu = cpu;
+
+ /*
+ * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and
+ * remains set until another vcpu is loaded in. As a rule GuestRID
+ * remains zeroed when in root context unless the kernel is busy
+ * manipulating guest tlb entries.
+ */
+ if (cpu_has_guestid) {
+ /*
+ * Check if our GuestID is of an older version and thus invalid.
+ *
+ * We also discard the stored GuestID if we've executed on
+ * another CPU, as the guest mappings may have changed without
+ * hypervisor knowledge.
+ */
+ if (migrated ||
+ (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) &
+ GUESTID_VERSION_MASK) {
+ kvm_vz_get_new_guestid(cpu, vcpu);
+ vcpu->arch.vzguestid[cpu] = guestid_cache(cpu);
+ trace_kvm_guestid_change(vcpu,
+ vcpu->arch.vzguestid[cpu]);
+ }
+
+ /* Restore GuestID */
+ change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]);
+ } else {
+ /*
+ * The Guest TLB only stores a single guest's TLB state, so
+ * flush it if another VCPU has executed on this CPU.
+ *
+ * We also flush if we've executed on another CPU, as the guest
+ * mappings may have changed without hypervisor knowledge.
+ */
+ if (migrated || last_exec_vcpu[cpu] != vcpu)
+ kvm_vz_local_flush_guesttlb_all();
+ last_exec_vcpu[cpu] = vcpu;
+
+ /*
+ * Root ASID dealiases guest GPA mappings in the root TLB.
+ * Allocate new root ASID if needed.
+ */
+ if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask)
+ || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) &
+ asid_version_mask(cpu))
+ get_new_mmu_context(gpa_mm, cpu);
+ }
+}
+
+static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ bool migrated, all;
+
+ /*
+ * Have we migrated to a different CPU?
+ * If so, any old guest TLB state may be stale.
+ */
+ migrated = (vcpu->arch.last_sched_cpu != cpu);
+
+ /*
+ * Was this the last VCPU to run on this CPU?
+ * If not, any old guest state from this VCPU will have been clobbered.
+ */
+ all = migrated || (last_vcpu[cpu] != vcpu);
+ last_vcpu[cpu] = vcpu;
+
+ /*
+ * Restore CP0_Wired unconditionally as we clear it after use, and
+ * restore wired guest TLB entries (while in guest context).
+ */
+ kvm_restore_gc0_wired(cop0);
+ if (current->flags & PF_VCPU) {
+ tlbw_use_hazard();
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+ kvm_vz_vcpu_load_wired(vcpu);
+ }
+
+ /*
+ * Restore timer state regardless, as e.g. Cause.TI can change over time
+ * if left unmaintained.
+ */
+ kvm_vz_restore_timer(vcpu);
+
+ /* Set MC bit if we want to trace guest mode changes */
+ if (kvm_trace_guest_mode_change)
+ set_c0_guestctl0(MIPS_GCTL0_MC);
+ else
+ clear_c0_guestctl0(MIPS_GCTL0_MC);
+
+ /* Don't bother restoring registers multiple times unless necessary */
+ if (!all)
+ return 0;
+
+ /*
+ * Restore config registers first, as some implementations restrict
+ * writes to other registers when the corresponding feature bits aren't
+ * set. For example Status.CU1 cannot be set unless Config1.FP is set.
+ */
+ kvm_restore_gc0_config(cop0);
+ if (cpu_guest_has_conf1)
+ kvm_restore_gc0_config1(cop0);
+ if (cpu_guest_has_conf2)
+ kvm_restore_gc0_config2(cop0);
+ if (cpu_guest_has_conf3)
+ kvm_restore_gc0_config3(cop0);
+ if (cpu_guest_has_conf4)
+ kvm_restore_gc0_config4(cop0);
+ if (cpu_guest_has_conf5)
+ kvm_restore_gc0_config5(cop0);
+ if (cpu_guest_has_conf6)
+ kvm_restore_gc0_config6(cop0);
+ if (cpu_guest_has_conf7)
+ kvm_restore_gc0_config7(cop0);
+
+ kvm_restore_gc0_index(cop0);
+ kvm_restore_gc0_entrylo0(cop0);
+ kvm_restore_gc0_entrylo1(cop0);
+ kvm_restore_gc0_context(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_restore_gc0_contextconfig(cop0);
+#ifdef CONFIG_64BIT
+ kvm_restore_gc0_xcontext(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_restore_gc0_xcontextconfig(cop0);
+#endif
+ kvm_restore_gc0_pagemask(cop0);
+ kvm_restore_gc0_pagegrain(cop0);
+ kvm_restore_gc0_hwrena(cop0);
+ kvm_restore_gc0_badvaddr(cop0);
+ kvm_restore_gc0_entryhi(cop0);
+ kvm_restore_gc0_status(cop0);
+ kvm_restore_gc0_intctl(cop0);
+ kvm_restore_gc0_epc(cop0);
+ kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0));
+ if (cpu_guest_has_userlocal)
+ kvm_restore_gc0_userlocal(cop0);
+
+ kvm_restore_gc0_errorepc(cop0);
+
+ /* restore KScratch registers if enabled in guest */
+ if (cpu_guest_has_conf4) {
+ if (cpu_guest_has_kscr(2))
+ kvm_restore_gc0_kscratch1(cop0);
+ if (cpu_guest_has_kscr(3))
+ kvm_restore_gc0_kscratch2(cop0);
+ if (cpu_guest_has_kscr(4))
+ kvm_restore_gc0_kscratch3(cop0);
+ if (cpu_guest_has_kscr(5))
+ kvm_restore_gc0_kscratch4(cop0);
+ if (cpu_guest_has_kscr(6))
+ kvm_restore_gc0_kscratch5(cop0);
+ if (cpu_guest_has_kscr(7))
+ kvm_restore_gc0_kscratch6(cop0);
+ }
+
+ if (cpu_guest_has_badinstr)
+ kvm_restore_gc0_badinstr(cop0);
+ if (cpu_guest_has_badinstrp)
+ kvm_restore_gc0_badinstrp(cop0);
+
+ if (cpu_guest_has_segments) {
+ kvm_restore_gc0_segctl0(cop0);
+ kvm_restore_gc0_segctl1(cop0);
+ kvm_restore_gc0_segctl2(cop0);
+ }
+
+ /* restore HTW registers */
+ if (cpu_guest_has_htw) {
+ kvm_restore_gc0_pwbase(cop0);
+ kvm_restore_gc0_pwfield(cop0);
+ kvm_restore_gc0_pwsize(cop0);
+ kvm_restore_gc0_pwctl(cop0);
+ }
+
+ /* restore Root.GuestCtl2 from unused Guest guestctl2 register */
+ if (cpu_has_guestctl2)
+ write_c0_guestctl2(
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]);
+
+ /*
+ * We should clear linked load bit to break interrupted atomics. This
+ * prevents a SC on the next VCPU from succeeding by matching a LL on
+ * the previous VCPU.
+ */
+ if (cpu_guest_has_rw_llb)
+ write_gc0_lladdr(0);
+
+ return 0;
+}
+
+static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ if (current->flags & PF_VCPU)
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ kvm_lose_fpu(vcpu);
+
+ kvm_save_gc0_index(cop0);
+ kvm_save_gc0_entrylo0(cop0);
+ kvm_save_gc0_entrylo1(cop0);
+ kvm_save_gc0_context(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_save_gc0_contextconfig(cop0);
+#ifdef CONFIG_64BIT
+ kvm_save_gc0_xcontext(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_save_gc0_xcontextconfig(cop0);
+#endif
+ kvm_save_gc0_pagemask(cop0);
+ kvm_save_gc0_pagegrain(cop0);
+ kvm_save_gc0_wired(cop0);
+ /* allow wired TLB entries to be overwritten */
+ clear_gc0_wired(MIPSR6_WIRED_WIRED);
+ kvm_save_gc0_hwrena(cop0);
+ kvm_save_gc0_badvaddr(cop0);
+ kvm_save_gc0_entryhi(cop0);
+ kvm_save_gc0_status(cop0);
+ kvm_save_gc0_intctl(cop0);
+ kvm_save_gc0_epc(cop0);
+ kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase());
+ if (cpu_guest_has_userlocal)
+ kvm_save_gc0_userlocal(cop0);
+
+ /* only save implemented config registers */
+ kvm_save_gc0_config(cop0);
+ if (cpu_guest_has_conf1)
+ kvm_save_gc0_config1(cop0);
+ if (cpu_guest_has_conf2)
+ kvm_save_gc0_config2(cop0);
+ if (cpu_guest_has_conf3)
+ kvm_save_gc0_config3(cop0);
+ if (cpu_guest_has_conf4)
+ kvm_save_gc0_config4(cop0);
+ if (cpu_guest_has_conf5)
+ kvm_save_gc0_config5(cop0);
+ if (cpu_guest_has_conf6)
+ kvm_save_gc0_config6(cop0);
+ if (cpu_guest_has_conf7)
+ kvm_save_gc0_config7(cop0);
+
+ kvm_save_gc0_errorepc(cop0);
+
+ /* save KScratch registers if enabled in guest */
+ if (cpu_guest_has_conf4) {
+ if (cpu_guest_has_kscr(2))
+ kvm_save_gc0_kscratch1(cop0);
+ if (cpu_guest_has_kscr(3))
+ kvm_save_gc0_kscratch2(cop0);
+ if (cpu_guest_has_kscr(4))
+ kvm_save_gc0_kscratch3(cop0);
+ if (cpu_guest_has_kscr(5))
+ kvm_save_gc0_kscratch4(cop0);
+ if (cpu_guest_has_kscr(6))
+ kvm_save_gc0_kscratch5(cop0);
+ if (cpu_guest_has_kscr(7))
+ kvm_save_gc0_kscratch6(cop0);
+ }
+
+ if (cpu_guest_has_badinstr)
+ kvm_save_gc0_badinstr(cop0);
+ if (cpu_guest_has_badinstrp)
+ kvm_save_gc0_badinstrp(cop0);
+
+ if (cpu_guest_has_segments) {
+ kvm_save_gc0_segctl0(cop0);
+ kvm_save_gc0_segctl1(cop0);
+ kvm_save_gc0_segctl2(cop0);
+ }
+
+ /* save HTW registers if enabled in guest */
+ if (cpu_guest_has_htw &&
+ kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) {
+ kvm_save_gc0_pwbase(cop0);
+ kvm_save_gc0_pwfield(cop0);
+ kvm_save_gc0_pwsize(cop0);
+ kvm_save_gc0_pwctl(cop0);
+ }
+
+ kvm_vz_save_timer(vcpu);
+
+ /* save Root.GuestCtl2 in unused Guest guestctl2 register */
+ if (cpu_has_guestctl2)
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] =
+ read_c0_guestctl2();
+
+ return 0;
+}
+
+/**
+ * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB.
+ * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries).
+ *
+ * Attempt to resize the guest VTLB by writing guest Config registers. This is
+ * necessary for cores with a shared root/guest TLB to avoid overlap with wired
+ * entries in the root VTLB.
+ *
+ * Returns: The resulting guest VTLB size.
+ */
+static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
+{
+ unsigned int config4 = 0, ret = 0, limit;
+
+ /* Write MMUSize - 1 into guest Config registers */
+ if (cpu_guest_has_conf1)
+ change_gc0_config1(MIPS_CONF1_TLBS,
+ (size - 1) << MIPS_CONF1_TLBS_SHIFT);
+ if (cpu_guest_has_conf4) {
+ config4 = read_gc0_config4();
+ if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) {
+ config4 &= ~MIPS_CONF4_VTLBSIZEEXT;
+ config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
+ MIPS_CONF4_VTLBSIZEEXT_SHIFT;
+ } else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) {
+ config4 &= ~MIPS_CONF4_MMUSIZEEXT;
+ config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
+ MIPS_CONF4_MMUSIZEEXT_SHIFT;
+ }
+ write_gc0_config4(config4);
+ }
+
+ /*
+ * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it
+ * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write
+ * not dropped)
+ */
+ if (cpu_has_mips_r6) {
+ limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >>
+ MIPSR6_WIRED_LIMIT_SHIFT;
+ if (size - 1 <= limit)
+ limit = 0;
+ write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT);
+ }
+
+ /* Read back MMUSize - 1 */
+ back_to_back_c0_hazard();
+ if (cpu_guest_has_conf1)
+ ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >>
+ MIPS_CONF1_TLBS_SHIFT;
+ if (config4) {
+ if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT)
+ ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >>
+ MIPS_CONF4_VTLBSIZEEXT_SHIFT) <<
+ MIPS_CONF1_TLBS_SIZE;
+ else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT)
+ ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >>
+ MIPS_CONF4_MMUSIZEEXT_SHIFT) <<
+ MIPS_CONF1_TLBS_SIZE;
+ }
+ return ret + 1;
+}
+
+static int kvm_vz_hardware_enable(void)
+{
+ unsigned int mmu_size, guest_mmu_size, ftlb_size;
+ u64 guest_cvmctl, cvmvmconfig;
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Set up guest timer/perfcount IRQ lines */
+ guest_cvmctl = read_gc0_cvmctl();
+ guest_cvmctl &= ~CVMCTL_IPTI;
+ guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT;
+ guest_cvmctl &= ~CVMCTL_IPPCI;
+ guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT;
+ write_gc0_cvmctl(guest_cvmctl);
+
+ cvmvmconfig = read_c0_cvmvmconfig();
+ /* No I/O hole translation. */
+ cvmvmconfig |= CVMVMCONF_DGHT;
+ /* Halve the root MMU size */
+ mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
+ >> CVMVMCONF_MMUSIZEM1_S) + 1;
+ guest_mmu_size = mmu_size / 2;
+ mmu_size -= guest_mmu_size;
+ cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
+ cvmvmconfig |= mmu_size - 1;
+ write_c0_cvmvmconfig(cvmvmconfig);
+
+ /* Update our records */
+ current_cpu_data.tlbsize = mmu_size;
+ current_cpu_data.tlbsizevtlb = mmu_size;
+ current_cpu_data.guest.tlbsize = guest_mmu_size;
+
+ /* Flush moved entries in new (guest) context */
+ kvm_vz_local_flush_guesttlb_all();
+ break;
+ default:
+ /*
+ * ImgTec cores tend to use a shared root/guest TLB. To avoid
+ * overlap of root wired and guest entries, the guest TLB may
+ * need resizing.
+ */
+ mmu_size = current_cpu_data.tlbsizevtlb;
+ ftlb_size = current_cpu_data.tlbsize - mmu_size;
+
+ /* Try switching to maximum guest VTLB size for flush */
+ guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size);
+ current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
+ kvm_vz_local_flush_guesttlb_all();
+
+ /*
+ * Reduce to make space for root wired entries and at least 2
+ * root non-wired entries. This does assume that long-term wired
+ * entries won't be added later.
+ */
+ guest_mmu_size = mmu_size - num_wired_entries() - 2;
+ guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size);
+ current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
+
+ /*
+ * Write the VTLB size, but if another CPU has already written,
+ * check it matches or we won't provide a consistent view to the
+ * guest. If this ever happens it suggests an asymmetric number
+ * of wired entries.
+ */
+ if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) &&
+ WARN(guest_mmu_size != kvm_vz_guest_vtlb_size,
+ "Available guest VTLB size mismatch"))
+ return -EINVAL;
+ break;
+ }
+
+ /*
+ * Enable virtualization features granting guest direct control of
+ * certain features:
+ * CP0=1: Guest coprocessor 0 context.
+ * AT=Guest: Guest MMU.
+ * CG=1: Hit (virtual address) CACHE operations (optional).
+ * CF=1: Guest Config registers.
+ * CGI=1: Indexed flush CACHE operations (optional).
+ */
+ write_c0_guestctl0(MIPS_GCTL0_CP0 |
+ (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) |
+ MIPS_GCTL0_CG | MIPS_GCTL0_CF);
+ if (cpu_has_guestctl0ext)
+ set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
+
+ if (cpu_has_guestid) {
+ write_c0_guestctl1(0);
+ kvm_vz_local_flush_roottlb_all_guests();
+
+ GUESTID_MASK = current_cpu_data.guestid_mask;
+ GUESTID_FIRST_VERSION = GUESTID_MASK + 1;
+ GUESTID_VERSION_MASK = ~GUESTID_MASK;
+
+ current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION;
+ }
+
+ /* clear any pending injected virtual guest interrupts */
+ if (cpu_has_guestctl2)
+ clear_c0_guestctl2(0x3f << 10);
+
+ return 0;
+}
+
+static void kvm_vz_hardware_disable(void)
+{
+ u64 cvmvmconfig;
+ unsigned int mmu_size;
+
+ /* Flush any remaining guest TLB entries */
+ kvm_vz_local_flush_guesttlb_all();
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /*
+ * Allocate whole TLB for root. Existing guest TLB entries will
+ * change ownership to the root TLB. We should be safe though as
+ * they've already been flushed above while in guest TLB.
+ */
+ cvmvmconfig = read_c0_cvmvmconfig();
+ mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
+ >> CVMVMCONF_MMUSIZEM1_S) + 1;
+ cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
+ cvmvmconfig |= mmu_size - 1;
+ write_c0_cvmvmconfig(cvmvmconfig);
+
+ /* Update our records */
+ current_cpu_data.tlbsize = mmu_size;
+ current_cpu_data.tlbsizevtlb = mmu_size;
+ current_cpu_data.guest.tlbsize = 0;
+
+ /* Flush moved entries in new (root) context */
+ local_flush_tlb_all();
+ break;
+ }
+
+ if (cpu_has_guestid) {
+ write_c0_guestctl1(0);
+ kvm_vz_local_flush_roottlb_all_guests();
+ }
+}
+
+static int kvm_vz_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_MIPS_VZ:
+ /* we wouldn't be here unless cpu_has_vz */
+ r = 1;
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_CAP_MIPS_64BIT:
+ /* We support 64-bit registers/operations and addresses */
+ r = 2;
+ break;
+#endif
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ vcpu->arch.vzguestid[i] = 0;
+
+ return 0;
+}
+
+static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+
+ /*
+ * If the VCPU is freed and reused as another VCPU, we don't want the
+ * matching pointer wrongly hanging around in last_vcpu[] or
+ * last_exec_vcpu[].
+ */
+ for_each_possible_cpu(cpu) {
+ if (last_vcpu[cpu] == vcpu)
+ last_vcpu[cpu] = NULL;
+ if (last_exec_vcpu[cpu] == vcpu)
+ last_exec_vcpu[cpu] = NULL;
+ }
+}
+
+static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */
+
+ /*
+ * Start off the timer at the same frequency as the host timer, but the
+ * soft timer doesn't handle frequencies greater than 1GHz yet.
+ */
+ if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC)
+ count_hz = mips_hpt_frequency;
+ kvm_mips_init_count(vcpu, count_hz);
+
+ /*
+ * Initialize guest register state to valid architectural reset state.
+ */
+
+ /* PageGrain */
+ if (cpu_has_mips_r6)
+ kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC);
+ /* Wired */
+ if (cpu_has_mips_r6)
+ kvm_write_sw_gc0_wired(cop0,
+ read_gc0_wired() & MIPSR6_WIRED_LIMIT);
+ /* Status */
+ kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL);
+ if (cpu_has_mips_r6)
+ kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status());
+ /* IntCtl */
+ kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() &
+ (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI));
+ /* PRId */
+ kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id);
+ /* EBase */
+ kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id);
+ /* Config */
+ kvm_save_gc0_config(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK,
+ _page_cachable_default >> _CACHE_SHIFT);
+ /* architecturally read only, but maybe writable from root */
+ kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config());
+ if (cpu_guest_has_conf1) {
+ kvm_set_sw_gc0_config(cop0, MIPS_CONF_M);
+ /* Config1 */
+ kvm_save_gc0_config1(cop0);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 |
+ MIPS_CONF1_MD |
+ MIPS_CONF1_PC |
+ MIPS_CONF1_WR |
+ MIPS_CONF1_CA |
+ MIPS_CONF1_FP);
+ }
+ if (cpu_guest_has_conf2) {
+ kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M);
+ /* Config2 */
+ kvm_save_gc0_config2(cop0);
+ }
+ if (cpu_guest_has_conf3) {
+ kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M);
+ /* Config3 */
+ kvm_save_gc0_config3(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA |
+ MIPS_CONF3_BPG |
+ MIPS_CONF3_ULRI |
+ MIPS_CONF3_DSP |
+ MIPS_CONF3_CTXTC |
+ MIPS_CONF3_ITL |
+ MIPS_CONF3_LPA |
+ MIPS_CONF3_VEIC |
+ MIPS_CONF3_VINT |
+ MIPS_CONF3_SP |
+ MIPS_CONF3_CDMM |
+ MIPS_CONF3_MT |
+ MIPS_CONF3_SM |
+ MIPS_CONF3_TL);
+ }
+ if (cpu_guest_has_conf4) {
+ kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M);
+ /* Config4 */
+ kvm_save_gc0_config4(cop0);
+ }
+ if (cpu_guest_has_conf5) {
+ kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M);
+ /* Config5 */
+ kvm_save_gc0_config5(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K |
+ MIPS_CONF5_CV |
+ MIPS_CONF5_MSAEN |
+ MIPS_CONF5_UFE |
+ MIPS_CONF5_FRE |
+ MIPS_CONF5_SBRI |
+ MIPS_CONF5_UFR);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP);
+ }
+
+ if (cpu_guest_has_contextconfig) {
+ /* ContextConfig */
+ kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0);
+#ifdef CONFIG_64BIT
+ /* XContextConfig */
+ /* bits SEGBITS-13+3:4 set */
+ kvm_write_sw_gc0_xcontextconfig(cop0,
+ ((1ull << (cpu_vmbits - 13)) - 1) << 4);
+#endif
+ }
+
+ /* Implementation dependent, use the legacy layout */
+ if (cpu_guest_has_segments) {
+ /* SegCtl0, SegCtl1, SegCtl2 */
+ kvm_write_sw_gc0_segctl0(cop0, 0x00200010);
+ kvm_write_sw_gc0_segctl1(cop0, 0x00000002 |
+ (_page_cachable_default >> _CACHE_SHIFT) <<
+ (16 + MIPS_SEGCFG_C_SHIFT));
+ kvm_write_sw_gc0_segctl2(cop0, 0x00380438);
+ }
+
+ /* reset HTW registers */
+ if (cpu_guest_has_htw && cpu_has_mips_r6) {
+ /* PWField */
+ kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302);
+ /* PWSize */
+ kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT);
+ }
+
+ /* start with no pending virtual guest interrupts */
+ if (cpu_has_guestctl2)
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0;
+
+ /* Put PC at reset vector */
+ vcpu->arch.pc = CKSEG1ADDR(0x1fc00000);
+
+ return 0;
+}
+
+static void kvm_vz_flush_shadow_all(struct kvm *kvm)
+{
+ if (cpu_has_guestid) {
+ /* Flush GuestID for each VCPU individually */
+ kvm_flush_remote_tlbs(kvm);
+ } else {
+ /*
+ * For each CPU there is a single GPA ASID used by all VCPUs in
+ * the VM, so it doesn't make sense for the VCPUs to handle
+ * invalidation of these ASIDs individually.
+ *
+ * Instead mark all CPUs as needing ASID invalidation in
+ * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to
+ * kick any running VCPUs so they check asid_flush_mask.
+ */
+ cpumask_setall(&kvm->arch.asid_flush_mask);
+ kvm_flush_remote_tlbs(kvm);
+ }
+}
+
+static void kvm_vz_flush_shadow_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
+{
+ kvm_vz_flush_shadow_all(kvm);
+}
+
+static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int cpu = smp_processor_id();
+ int preserve_guest_tlb;
+
+ preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu);
+
+ if (preserve_guest_tlb)
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+
+ if (preserve_guest_tlb)
+ kvm_vz_vcpu_load_wired(vcpu);
+}
+
+static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int cpu = smp_processor_id();
+ int r;
+
+ kvm_vz_acquire_htimer(vcpu);
+ /* Check if we have any exceptions/interrupts pending */
+ kvm_mips_deliver_interrupts(vcpu, read_gc0_cause());
+
+ kvm_vz_check_requests(vcpu, cpu);
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+ kvm_vz_vcpu_load_wired(vcpu);
+
+ r = vcpu->arch.vcpu_run(run, vcpu);
+
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ return r;
+}
+
+static struct kvm_mips_callbacks kvm_vz_callbacks = {
+ .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable,
+ .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss,
+ .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss,
+ .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss,
+ .handle_addr_err_st = kvm_trap_vz_no_handler,
+ .handle_addr_err_ld = kvm_trap_vz_no_handler,
+ .handle_syscall = kvm_trap_vz_no_handler,
+ .handle_res_inst = kvm_trap_vz_no_handler,
+ .handle_break = kvm_trap_vz_no_handler,
+ .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
+ .handle_guest_exit = kvm_trap_vz_handle_guest_exit,
+
+ .hardware_enable = kvm_vz_hardware_enable,
+ .hardware_disable = kvm_vz_hardware_disable,
+ .check_extension = kvm_vz_check_extension,
+ .vcpu_init = kvm_vz_vcpu_init,
+ .vcpu_uninit = kvm_vz_vcpu_uninit,
+ .vcpu_setup = kvm_vz_vcpu_setup,
+ .flush_shadow_all = kvm_vz_flush_shadow_all,
+ .flush_shadow_memslot = kvm_vz_flush_shadow_memslot,
+ .gva_to_gpa = kvm_vz_gva_to_gpa_cb,
+ .queue_timer_int = kvm_vz_queue_timer_int_cb,
+ .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb,
+ .queue_io_int = kvm_vz_queue_io_int_cb,
+ .dequeue_io_int = kvm_vz_dequeue_io_int_cb,
+ .irq_deliver = kvm_vz_irq_deliver_cb,
+ .irq_clear = kvm_vz_irq_clear_cb,
+ .num_regs = kvm_vz_num_regs,
+ .copy_reg_indices = kvm_vz_copy_reg_indices,
+ .get_one_reg = kvm_vz_get_one_reg,
+ .set_one_reg = kvm_vz_set_one_reg,
+ .vcpu_load = kvm_vz_vcpu_load,
+ .vcpu_put = kvm_vz_vcpu_put,
+ .vcpu_run = kvm_vz_vcpu_run,
+ .vcpu_reenter = kvm_vz_vcpu_reenter,
+};
+
+int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
+{
+ if (!cpu_has_vz)
+ return -ENODEV;
+
+ /*
+ * VZ requires at least 2 KScratch registers, so it should have been
+ * possible to allocate pgd_reg.
+ */
+ if (WARN(pgd_reg == -1,
+ "pgd_reg not allocated even though cpu_has_vz\n"))
+ return -ENODEV;
+
+ pr_info("Starting KVM with MIPS VZ extensions\n");
+
+ *install_callbacks = &kvm_vz_callbacks;
+ return 0;
+}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 6db341347202..899e46279902 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -24,6 +24,7 @@
/* Cache operations. */
void (*flush_cache_all)(void);
void (*__flush_cache_all)(void);
+EXPORT_SYMBOL_GPL(__flush_cache_all);
void (*flush_cache_mm)(struct mm_struct *mm);
void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index aa75849c36bc..3ca20283b31e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -348,7 +348,7 @@ void maar_init(void)
upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
pr_info(" [%d]: ", i / 2);
- if (!(attr & MIPS_MAAR_V)) {
+ if (!(attr & MIPS_MAAR_VL)) {
pr_cont("disabled\n");
continue;
}
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index 5fcb9ac72693..f0a5d8b844d6 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
return val;
}
-#define xchg(ptr, with) \
- ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with) \
+ ({ \
+ (__typeof__(*(ptr))) __xchg((unsigned long)(with), \
+ (ptr), \
+ sizeof(*(ptr))); \
+ })
#endif /* __ASM_OPENRISC_CMPXCHG_H */
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 140faa16685a..1311e6b13991 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@ do { \
case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); \
+ case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 5c4695d13542..ee3e604959e1 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -30,6 +30,7 @@
#include <asm/hardirq.h>
#include <asm/delay.h>
#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
DECLARE_EXPORT(__ashrdi3);
DECLARE_EXPORT(__ashldi3);
DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
+EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 828a29110459..f8da545854f9 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
}
void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
/*
* When a process does an "exec", machine state like FPU and debug
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 19c9c3c5f267..c7e15cc5c668 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
#define flush_kernel_dcache_range(start,size) \
flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates. Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
- unsigned long start = (unsigned long)vaddr;
-
- flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
- unsigned long start = (unsigned long)vaddr;
- void *cursor = vaddr;
- for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
- struct page *page = vmalloc_to_page(cursor);
-
- if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
- flush_kernel_dcache_page(page);
- }
- flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
#define flush_cache_vmap(start, end) flush_cache_all()
#define flush_cache_vunmap(start, end) flush_cache_all()
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index fb4382c28259..edfbf9d6a6dd 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,7 +32,8 @@
* that put_user is the same as __put_user, etc.
*/
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size) \
+ ( (uaddr) == (uaddr) )
#define put_user __put_user
#define get_user __get_user
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 6b0741e7a7ed..667c99421003 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -362,8 +362,9 @@
#define __NR_copy_file_range (__NR_Linux + 346)
#define __NR_preadv2 (__NR_Linux + 347)
#define __NR_pwritev2 (__NR_Linux + 348)
+#define __NR_statx (__NR_Linux + 349)
-#define __NR_Linux_syscalls (__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls (__NR_statx + 1)
#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 0dc72d5de861..c32a09095216 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
}
}
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+ unsigned long start = (unsigned long)vaddr;
+
+ if ((unsigned long)size > parisc_cache_flush_threshold)
+ flush_data_cache();
+ else
+ flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+ unsigned long start = (unsigned long)vaddr;
+
+ if ((unsigned long)size > parisc_cache_flush_threshold)
+ flush_data_cache();
+ else
+ flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index a0ecdb4abcc8..c66c943d9322 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
*/
*loc = fsel(val, addend);
break;
+ case R_PARISC_SECREL32:
+ /* 32-bit section relative address. */
+ *loc = fsel(val, addend);
+ break;
case R_PARISC_DPREL21L:
/* left 21 bit of relative address */
val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
*/
*loc = fsel(val, addend);
break;
+ case R_PARISC_SECREL32:
+ /* 32-bit section relative address. */
+ *loc = fsel(val, addend);
+ break;
case R_PARISC_FPTR64:
/* 64-bit function address */
if(in_local(me, (void *)(val + addend))) {
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index e282a5131d77..6017a5af2e6e 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
* in various PDC revisions. The code is much more maintainable
* and reliable this way vs having to debug on every version of PDC
- * on every box.
+ * on every box.
*/
#include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
static int perf_release(struct inode *inode, struct file *file);
static int perf_open(struct inode *inode, struct file *file);
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
- loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
static void perf_start_counters(void);
static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
/*
* configure:
*
- * Configure the cpu with a given data image. First turn off the counters,
+ * Configure the cpu with a given data image. First turn off the counters,
* then download the image, then turn the counters back on.
*/
static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
error = perf_stop_counters(raddr);
if (error != 0) {
printk("perf_config: perf_stop_counters = %ld\n", error);
- return -EINVAL;
+ return -EINVAL;
}
printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
error = perf_write_image((uint64_t *)image_ptr);
if (error != 0) {
printk("perf_config: DOWNLOAD = %ld\n", error);
- return -EINVAL;
+ return -EINVAL;
}
printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
}
/*
- * Open the device and initialize all of its memory. The device is only
+ * Open the device and initialize all of its memory. The device is only
* opened once, but can be "queried" by multiple processes that know its
* file descriptor.
*/
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
* called on the processor that the download should happen
* on.
*/
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
size_t image_size;
uint32_t image_type;
uint32_t interface_type;
uint32_t test;
- if (perf_processor_interface == ONYX_INTF)
+ if (perf_processor_interface == ONYX_INTF)
image_size = PCXU_IMAGE_SIZE;
- else if (perf_processor_interface == CUDA_INTF)
+ else if (perf_processor_interface == CUDA_INTF)
image_size = PCXW_IMAGE_SIZE;
- else
+ else
return -EFAULT;
if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
/* First check the machine type is correct for
the requested image */
- if (((perf_processor_interface == CUDA_INTF) &&
- (interface_type != CUDA_INTF)) ||
- ((perf_processor_interface == ONYX_INTF) &&
- (interface_type != ONYX_INTF)))
+ if (((perf_processor_interface == CUDA_INTF) &&
+ (interface_type != CUDA_INTF)) ||
+ ((perf_processor_interface == ONYX_INTF) &&
+ (interface_type != ONYX_INTF)))
return -EINVAL;
/* Next check to make sure the requested image
is valid */
- if (((interface_type == CUDA_INTF) &&
+ if (((interface_type == CUDA_INTF) &&
(test >= MAX_CUDA_IMAGES)) ||
- ((interface_type == ONYX_INTF) &&
- (test >= MAX_ONYX_IMAGES)))
+ ((interface_type == ONYX_INTF) &&
+ (test >= MAX_ONYX_IMAGES)))
return -EINVAL;
/* Copy the image into the processor */
- if (interface_type == CUDA_INTF)
+ if (interface_type == CUDA_INTF)
return perf_config(cuda_images[test]);
else
return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
static void perf_patch_images(void)
{
#if 0 /* FIXME!! */
-/*
+/*
* NOTE: this routine is VERY specific to the current TLB image.
* If the image is changed, this routine might also need to be changed.
*/
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
extern void $i_dtlb_miss_2_0();
extern void PA2_0_iva();
- /*
+ /*
* We can only use the lower 32-bits, the upper 32-bits should be 0
- * anyway given this is in the kernel
+ * anyway given this is in the kernel
*/
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
if (perf_processor_interface == ONYX_INTF) {
/* clear last 2 bytes */
- onyx_images[TLBMISS][15] &= 0xffffff00;
+ onyx_images[TLBMISS][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
onyx_images[TLBMISS][17] = itlb_addr;
/* clear last 2 bytes */
- onyx_images[TLBHANDMISS][15] &= 0xffffff00;
+ onyx_images[TLBHANDMISS][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
onyx_images[TLBHANDMISS][17] = itlb_addr;
/* clear last 2 bytes */
- onyx_images[BIG_CPI][15] &= 0xffffff00;
+ onyx_images[BIG_CPI][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
} else if (perf_processor_interface == CUDA_INTF) {
/* Cuda interface */
- cuda_images[TLBMISS][16] =
+ cuda_images[TLBMISS][16] =
(cuda_images[TLBMISS][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[TLBMISS][17] =
+ cuda_images[TLBMISS][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
- cuda_images[TLBHANDMISS][16] =
+ cuda_images[TLBHANDMISS][16] =
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[TLBHANDMISS][17] =
+ cuda_images[TLBHANDMISS][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
- cuda_images[BIG_CPI][16] =
+ cuda_images[BIG_CPI][16] =
(cuda_images[BIG_CPI][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[BIG_CPI][17] =
+ cuda_images[BIG_CPI][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
} else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
/*
* ioctl routine
- * All routines effect the processor that they are executed on. Thus you
+ * All routines effect the processor that they are executed on. Thus you
* must be running on the processor that you wish to change.
*/
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
/* copy out the Counters */
- if (copy_to_user((void __user *)arg, raddr,
+ if (copy_to_user((void __user *)arg, raddr,
sizeof (raddr)) != 0) {
error = -EFAULT;
break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
.open = perf_open,
.release = perf_release
};
-
+
static struct miscdevice perf_dev = {
MISC_DYNAMIC_MINOR,
PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
/* OR sticky2 (bit 1496) to counter2 bit 32 */
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
raddr[2] = (uint32_t)tmp64;
-
+
/* Counter3 is bits 1497 to 1528 */
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
/* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
userbuf[22] = 0;
userbuf[23] = 0;
- /*
+ /*
* Write back the zeroed bytes + the image given
* the read was destructive.
*/
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
} else {
/*
- * Read RDR-15 which contains the counters and sticky bits
+ * Read RDR-15 which contains the counters and sticky bits
*/
if (!perf_rdr_read_ubuf(15, userbuf)) {
return -13;
}
- /*
+ /*
* Clear out the counters
*/
perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
}
-
+
return 0;
}
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer)
i = tentry->num_words;
while (i--) {
buffer[i] = 0;
- }
+ }
/* Check for bits an even number of 64 */
if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
}
runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+ if (!runway) {
+ pr_err("perf_write_image: ioremap failed!\n");
+ return -ENOMEM;
+ }
/* Merge intrigue bits into Runway STATUS 0 */
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
- __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
+ __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
runway + RUNWAY_STATUS);
-
+
/* Write RUNWAY DEBUG registers */
for (i = 0; i < 8; i++) {
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
}
- return 0;
+ return 0;
}
/*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
perf_rdr_shift_out_U(rdr_num, buffer[i]);
} else {
perf_rdr_shift_out_W(rdr_num, buffer[i]);
- }
+ }
}
printk("perf_rdr_write done\n");
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 06f7ca7fe70b..b76f503eee4a 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -142,6 +142,8 @@ void machine_power_off(void)
printk(KERN_EMERG "System shut down completed.\n"
"Please power this system off now.");
+
+ for (;;);
}
void (*pm_power_off)(void) = machine_power_off;
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 3cfef1de8061..44aeaa9c039f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -444,6 +444,7 @@
ENTRY_SAME(copy_file_range)
ENTRY_COMP(preadv2)
ENTRY_COMP(pwritev2)
+ ENTRY_SAME(statx)
.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 861e72109df2..f080abfc2f83 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,7 @@ SECTIONS
}
#ifdef CONFIG_PPC64_BOOT_WRAPPER
+ . = ALIGN(256);
.got :
{
__toc_start = .;
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 9fa046d56eba..411994551afc 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
- *key = 0;
+ *key = ~0;
return 0;
}
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 73eb794d6163..bc5fdfd22788 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -51,6 +51,10 @@
#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit))
#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \
+ ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
#include <asm/barrier.h>
/* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7bba8f415627..01d05c76f1c7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -45,9 +45,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-#ifdef CONFIG_KVM_MMIO
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#endif
#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f97d8cb6bdf6..ed62efe01e49 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,6 +66,55 @@
#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1) ( \
+ PPC_BITEXTRACT(srr1, 45, 0) | \
+ PPC_BITEXTRACT(srr1, 44, 1) | \
+ PPC_BITEXTRACT(srr1, 43, 2) | \
+ PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE 1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY 2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA 11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE (PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \
+ P9_DSISR_MC_SLB_PARITY_MFSLB | \
+ P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
enum MCE_Version {
MCE_V1 = 1,
};
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_SLB = 2,
MCE_ERROR_TYPE_ERAT = 3,
MCE_ERROR_TYPE_TLB = 4,
+ MCE_ERROR_TYPE_USER = 5,
+ MCE_ERROR_TYPE_RA = 6,
+ MCE_ERROR_TYPE_LINK = 7,
};
enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
MCE_TLB_ERROR_MULTIHIT = 2,
};
+enum MCE_UserErrorType {
+ MCE_USER_ERROR_INDETERMINATE = 0,
+ MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+ MCE_RA_ERROR_INDETERMINATE = 0,
+ MCE_RA_ERROR_IFETCH = 1,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+ MCE_RA_ERROR_LOAD = 4,
+ MCE_RA_ERROR_STORE = 5,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+ MCE_LINK_ERROR_INDETERMINATE = 0,
+ MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+ MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+ MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
struct machine_check_event {
enum MCE_Version version:8; /* 0x00 */
uint8_t in_use; /* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
uint64_t effective_address;
uint8_t reserved_2[16];
} tlb_error;
+
+ struct {
+ enum MCE_UserErrorType user_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } user_error;
+
+ struct {
+ enum MCE_RaErrorType ra_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } ra_error;
+
+ struct {
+ enum MCE_LinkErrorType link_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } link_error;
} u;
};
@@ -176,8 +278,12 @@ struct mce_error_info {
enum MCE_SlbErrorType slb_error_type:8;
enum MCE_EratErrorType erat_error_type:8;
enum MCE_TlbErrorType tlb_error_type:8;
+ enum MCE_UserErrorType user_error_type:8;
+ enum MCE_RaErrorType ra_error_type:8;
+ enum MCE_LinkErrorType link_error_type:8;
} u;
- uint8_t reserved[2];
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
};
#define MAX_MC_EVT 100
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4b369d83fe9c..1c9470881c4a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
SYSCALL(kexec_file_load)
+SYSCALL(statx)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index eb1acee91a20..9ba11dbcaca9 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 383
+#define NR_syscalls 384
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4bb0e8b..07fbeb927834 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -29,6 +29,9 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_GUEST_DEBUG
+/* Not always available, but if it is, this is the correct offset. */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
struct kvm_regs {
__u64 pc;
__u64 cr;
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 2f26335a3c42..b85f14228857 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -393,5 +393,6 @@
#define __NR_preadv2 380
#define __NR_pwritev2 381
#define __NR_kexec_file_load 382
+#define __NR_statx 383
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bb7a1890aeb7..e79b9daa873c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index c6923ff45131..a1475e6aef3a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
case MCE_ERROR_TYPE_TLB:
mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
break;
+ case MCE_ERROR_TYPE_USER:
+ mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+ break;
case MCE_ERROR_TYPE_UNKNOWN:
default:
break;
@@ -90,13 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
- mce->initiator = MCE_INITIATOR_CPU;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
- mce->severity = MCE_SEV_ERROR_SYNC;
+
+ mce->initiator = mce_err->initiator;
+ mce->severity = mce_err->severity;
/*
* Populate the mce error_type and type-specific error_type.
@@ -115,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
mce->u.erat_error.effective_address_provided = true;
mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+ mce->u.user_error.effective_address_provided = true;
+ mce->u.user_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+ mce->u.ra_error.effective_address_provided = true;
+ mce->u.ra_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+ mce->u.link_error.effective_address_provided = true;
+ mce->u.link_error.effective_address = addr;
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
@@ -239,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
"Parity",
"Multihit",
};
+ static const char *mc_user_types[] = {
+ "Indeterminate",
+ "tlbie(l) invalid",
+ };
+ static const char *mc_ra_types[] = {
+ "Indeterminate",
+ "Instruction fetch (bad)",
+ "Page table walk ifetch (bad)",
+ "Page table walk ifetch (foreign)",
+ "Load (bad)",
+ "Store (bad)",
+ "Page table walk Load/Store (bad)",
+ "Page table walk Load/Store (foreign)",
+ "Load/Store (foreign)",
+ };
+ static const char *mc_link_types[] = {
+ "Indeterminate",
+ "Instruction fetch (timeout)",
+ "Page table walk ifetch (timeout)",
+ "Load (timeout)",
+ "Store (timeout)",
+ "Page table walk Load/Store (timeout)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
@@ -315,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
printk("%s Effective address: %016llx\n",
level, evt->u.tlb_error.effective_address);
break;
+ case MCE_ERROR_TYPE_USER:
+ subtype = evt->u.user_error.user_error_type <
+ ARRAY_SIZE(mc_user_types) ?
+ mc_user_types[evt->u.user_error.user_error_type]
+ : "Unknown";
+ printk("%s Error type: User [%s]\n", level, subtype);
+ if (evt->u.user_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.user_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_RA:
+ subtype = evt->u.ra_error.ra_error_type <
+ ARRAY_SIZE(mc_ra_types) ?
+ mc_ra_types[evt->u.ra_error.ra_error_type]
+ : "Unknown";
+ printk("%s Error type: Real address [%s]\n", level, subtype);
+ if (evt->u.ra_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ra_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ subtype = evt->u.link_error.link_error_type <
+ ARRAY_SIZE(mc_link_types) ?
+ mc_link_types[evt->u.link_error.link_error_type]
+ : "Unknown";
+ printk("%s Error type: Link [%s]\n", level, subtype);
+ if (evt->u.link_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.link_error.effective_address);
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
printk("%s Error type: Unknown\n", level);
@@ -341,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
if (evt->u.tlb_error.effective_address_provided)
return evt->u.tlb_error.effective_address;
break;
+ case MCE_ERROR_TYPE_USER:
+ if (evt->u.user_error.effective_address_provided)
+ return evt->u.user_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ if (evt->u.ra_error.effective_address_provided)
+ return evt->u.ra_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ if (evt->u.link_error.effective_address_provided)
+ return evt->u.link_error.effective_address;
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991c4ece..763d6f58caa8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
}
#endif
+static void flush_erat(void)
+{
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (what == MCE_FLUSH_SLB) {
+ flush_and_reload_slb();
+ return 1;
+ }
+#endif
+ if (what == MCE_FLUSH_ERAT) {
+ flush_erat();
+ return 1;
+ }
+ if (what == MCE_FLUSH_TLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+ if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+ dsisr &= ~slb;
+ if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+ dsisr &= ~erat;
+ if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+ dsisr &= ~tlb;
+ /* Any other errors we don't understand? */
+ if (dsisr)
+ return 0;
+ return 1;
+}
+
static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
{
long handled = 1;
@@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
+ mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+ mce_error_info.initiator = MCE_INITIATOR_CPU;
+
srr1 = regs->msr;
nip = regs->nip;
@@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
+ mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+ mce_error_info.initiator = MCE_INITIATOR_CPU;
+
srr1 = regs->msr;
nip = regs->nip;
@@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+ uint64_t dsisr = regs->dsisr;
+
+ return mce_handle_flush_derrors(dsisr,
+ P9_DSISR_MC_SLB_PARITY_MFSLB |
+ P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+ P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+ P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+ uint64_t srr1 = regs->msr;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_SLB_PARITY:
+ case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ return mce_flush(MCE_FLUSH_SLB);
+ case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ return mce_flush(MCE_FLUSH_TLB);
+ case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+ return mce_flush(MCE_FLUSH_ERAT);
+ default:
+ return 0;
+ }
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+ struct mce_error_info *mce_err, uint64_t *addr)
+{
+ uint64_t dsisr = regs->dsisr;
+
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
+
+ if (dsisr & P9_DSISR_MC_USER_TLBIE)
+ *addr = regs->nip;
+ else
+ *addr = regs->dar;
+
+ if (dsisr & P9_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+ } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+ } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+ mce_err->error_type = MCE_ERROR_TYPE_USER;
+ mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+ } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+ } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ }
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+ struct mce_error_info *mce_err, uint64_t *addr)
+{
+ uint64_t srr1 = regs->msr;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+ case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+ mce_err->severity = MCE_SEV_FATAL;
+ break;
+ default:
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ break;
+ }
+
+ mce_err->initiator = MCE_INITIATOR_CPU;
+
+ *addr = regs->nip;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_UE:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_RA:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+ break;
+ default:
+ break;
+ }
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+ uint64_t nip, addr;
+ long handled;
+ struct mce_error_info mce_error_info = { 0 };
+
+ nip = regs->nip;
+
+ if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+ handled = mce_handle_derror_p9(regs);
+ mce_get_derror_p9(regs, &mce_error_info, &addr);
+ } else {
+ handled = mce_handle_ierror_p9(regs);
+ mce_get_ierror_p9(regs, &mce_error_info, &addr);
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95c91a9de351..0e42aa8a279f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -524,11 +524,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
/* We support this only for PR */
r = !hv_enabled;
break;
-#ifdef CONFIG_KVM_MMIO
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
-#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
r = 1;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 595dd718ea87..2ff13249f87a 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
sdsync = POWER7P_MMCRA_SDAR_VALID;
else if (ppmu->flags & PPMU_ALT_SIPR)
sdsync = POWER6_MMCRA_SDSYNC;
+ else if (ppmu->flags & PPMU_NO_SIAR)
+ sdsync = MMCRA_SAMPLE_ENABLE;
else
sdsync = MMCRA_SDSYNC;
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index e79fb5fb817d..cd951fd231c4 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
return !(event & ~valid_mask);
}
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
{
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
- return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+ if (event & EVENT_IS_MARKED)
+ return true;
+
+ return false;
+}
- return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+ /*
+ * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+ * continous sampling mode.
+ *
+ * Incase of Power8:
+ * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+ * mode and will be un-changed when setting MMCRA[63] (Marked events).
+ *
+ * Incase of Power9:
+ * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+ * or if group already have any marked events.
+ * Non-Marked events (for DD1):
+ * MMCRA[SDAR_MODE] will be set to 0b01
+ * For rest
+ * MMCRA[SDAR_MODE] will be set from event code.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+ *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+ else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+ *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+ else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
+ } else
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
}
static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
value |= CNST_L1_QUAL_VAL(cache);
}
- if (event & EVENT_IS_MARKED) {
+ if (is_event_marked(event)) {
mask |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
}
/* In continuous sampling mode, update SDAR on TLB miss */
- mmcra |= mmcra_sdar_mode(event[i]);
+ mmcra_sdar_mode(event[i], &mmcra);
if (event[i] & EVENT_IS_L1) {
cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
}
- if (event[i] & EVENT_IS_MARKED) {
+ if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index cf9bd8990159..899210f14ee4 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -246,6 +246,7 @@
#define MMCRA_THR_CMP_SHIFT 32
#define MMCRA_SDAR_MODE_SHIFT 42
#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
#define MMCRA_IFM_SHIFT 30
/* MMCR1 Threshold Compare bit constant for power9 */
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 86d9fde93c17..e0f856bfbfe8 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
struct machine_check_event *evt)
{
int recovered = 0;
- uint64_t ea = get_mce_fault_addr(evt);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
- } else if (ea && !is_kernel_addr(ea)) {
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+ (user_mode(regs) && !is_global_init(current))) {
/*
- * Faulting address is not in kernel text. We should be fine.
- * We need to find which process uses this address.
* For now, kill the task if we have received exception when
* in userspace.
*
* TODO: Queue up this address for hwpoisioning later.
*/
- if (user_mode(regs) && !is_global_init(current)) {
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
- } else
- recovered = 0;
- } else if (user_mode(regs) && !is_global_init(current) &&
- evt->severity == MCE_SEV_ERROR_SYNC) {
- /*
- * If we have received a synchronous error when in userspace
- * kill the task.
- */
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
recovered = 1;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6901a06da2f9..e36738291c32 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus)
+ struct pci_bus *bus,
+ bool add_to_group)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
- iommu_add_device(&dev->dev);
+ if (add_to_group)
+ iommu_add_device(&dev->dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+ add_to_group);
}
}
@@ -2191,7 +2194,7 @@ found:
set_iommu_table_base(&pe->pdev->dev, tbl);
iommu_add_device(&pe->pdev->dev);
} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
return;
fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_set_bypass(pe, false);
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
pnv_ioda2_table_free(tbl);
}
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
table_group);
pnv_pci_ioda2_setup_default_config(pe);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2624,6 +2631,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
level_shift = entries_shift + 3;
level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ if ((level_shift - 3) * levels + page_shift >= 60)
+ return -EINVAL;
+
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
levels, tce_table_size, &offset, &total_allocated);
@@ -2728,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
if (pe->flags & PNV_IODA_PE_DEV)
iommu_add_device(&pe->pdev->dev);
else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
}
#ifdef CONFIG_PCI_MSI
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 251060cf1713..8b1fe895daa3 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
- mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+ if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+ mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
}
void radix_init_pseries(void)
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index f9760ccf4032..3696ea6c4826 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -116,13 +116,13 @@ dt_offset:
.data
.balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
.skip 32
- .size sha256_digest, . - sha256_digest
+ .size purgatory_sha256_digest, . - purgatory_sha256_digest
.balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
.skip 8 * 2 * 16
- .size sha_regions, . - sha_regions
+ .size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index d69ea495c4d7..716b17238599 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
ret = blkcipher_walk_done(desc, walk, nbytes - n);
}
if (k < n) {
- if (__ctr_paes_set_key(ctx) != 0)
+ if (__ctr_paes_set_key(ctx) != 0) {
+ if (locked)
+ spin_unlock(&ctrblk_lock);
return blkcipher_walk_done(desc, walk, -EIO);
+ }
}
}
if (locked)
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d1c407ddf703..9072bf63a846 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,31 +8,27 @@
#define _S390_CPUTIME_H
#include <linux/types.h>
-#include <asm/div64.h>
+#include <asm/timex.h>
#define CPUTIME_PER_USEC 4096ULL
#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
- return n / base;
-}
-
/*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
*/
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
{
- return (__force unsigned long long) cputime >> 12;
+ return cputime >> 12;
}
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
u64 arch_cpu_idle_time(int cpu);
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880b3cc1..e8f623041769 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
#define HWCAP_S390_VXRS 2048
#define HWCAP_S390_VXRS_BCD 4096
#define HWCAP_S390_VXRS_EXT 8192
+#define HWCAP_S390_GS 16384
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a41faf34b034..552c319483c6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <asm/cpu.h>
#include <asm/fpu/api.h>
#include <asm/isc.h>
+#include <asm/guarded_storage.h>
#define KVM_S390_BSCA_CPU_SLOTS 64
#define KVM_S390_ESCA_CPU_SLOTS 248
@@ -164,11 +165,21 @@ struct kvm_s390_sie_block {
#define ICTL_RRBE 0x00001000
#define ICTL_TPROT 0x00000200
__u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_SII 0x00000001
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
#define ICPT_VALIDITY 0x20
#define ICPT_STOP 0x28
#define ICPT_OPEREXC 0x2C
@@ -182,10 +193,19 @@ struct kvm_s390_sie_block {
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
__u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
__u8 ecb2; /* 0x0062 */
-#define ECB3_AES 0x04
#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
@@ -219,11 +239,14 @@ struct kvm_s390_sie_block {
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
- __u8 reserved188[24]; /* 0x0188 */
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
__u32 fac; /* 0x01a0 */
__u8 reserved1a4[20]; /* 0x01a4 */
__u64 cbrlo; /* 0x01b8 */
__u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -498,6 +521,12 @@ struct kvm_s390_local_interrupt {
#define FIRQ_CNTR_PFAULT 3
#define FIRQ_MAX_COUNT 4
+/* mask the AIS mode for a given ISC */
+#define AIS_MODE_MASK(isc) (0x80 >> isc)
+
+#define KVM_S390_AIS_MODE_ALL 0
+#define KVM_S390_AIS_MODE_SINGLE 1
+
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
spinlock_t lock;
@@ -507,6 +536,10 @@ struct kvm_s390_float_interrupt {
struct kvm_s390_ext_info srv_signal;
int next_rr_cpu;
unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ struct mutex ais_lock;
+ u8 simm;
+ u8 nimm;
+ int ais_enabled;
};
struct kvm_hw_wp_info_arch {
@@ -554,6 +587,7 @@ struct kvm_vcpu_arch {
/* if vsie is active, currently executed shadow sie control block */
struct kvm_s390_sie_block *vsie_block;
unsigned int host_acrs[NUM_ACRS];
+ struct gs_cb *host_gscb;
struct fpu host_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
@@ -574,6 +608,7 @@ struct kvm_vcpu_arch {
*/
seqcount_t cputm_seqcount;
__u64 cputm_start;
+ bool gs_enabled;
};
struct kvm_vm_stat {
@@ -596,6 +631,7 @@ struct s390_io_adapter {
bool maskable;
bool masked;
bool swap;
+ bool suppressible;
struct rw_semaphore maps_lock;
struct list_head maps;
atomic_t nr_maps;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0e95c0..8a5b082797f8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
__u64 stfle_fac_list[32]; /* 0x0f00 */
__u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
- /* Pointer to vector register save area */
- __u64 vector_save_area_addr; /* 0x11b0 */
+ /* Pointer to the machine check extended save area */
+ __u64 mcesad; /* 0x11b0 */
/* 64 bit extparam used for pfault/diag 250: defined by architecture */
__u64 ext_params2; /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
/* Transaction abort diagnostic block */
__u8 pgm_tdb[256]; /* 0x1800 */
- __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */
-
- /* Software defined save area for vector registers */
- __u8 vector_save_area[1024]; /* 0x1c00 */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed;
#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd910386a..e3e8895f5d3e 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
u64 ie : 1; /* 32 indirect storage error */
u64 ar : 1; /* 33 access register validity */
u64 da : 1; /* 34 delayed access exception */
- u64 : 7; /* 35-41 */
+ u64 : 1; /* 35 */
+ u64 gs : 1; /* 36 guarded storage registers */
+ u64 : 5; /* 37-41 */
u64 pr : 1; /* 42 tod programmable register validity */
u64 fc : 1; /* 43 fp control register validity */
u64 ap : 1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
};
};
+#define MCESA_ORIGIN_MASK (~0x3ffUL)
+#define MCESA_LC_MASK (0xfUL)
+
+struct mcesa {
+ u8 vector_save_area[1024];
+ u8 guarded_storage_save_area[32];
+};
+
struct pt_regs;
extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e4988710aa86..cc101f9371cb 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -135,6 +135,8 @@ struct thread_struct {
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
+ struct gs_cb *gs_cb; /* Current guarded storage cb */
+ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
+/* Free guarded storage control block for current */
+void exit_thread_gs(void);
+
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a027f3..383bd8358a8c 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -31,6 +31,7 @@
#define MACHINE_FLAG_VX _BITUL(13)
#define MACHINE_FLAG_CAD _BITUL(14)
#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_GS _BITUL(16)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -70,6 +71,7 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..f6c2b5814ab0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
#include <linux/thread_info.h>
#include <asm/fpu/api.h>
#include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
extern struct task_struct *__switch_to(void *, void *);
extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
save_fpu_regs(); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
} \
if (next->mm) { \
update_cr_regs(next); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a445eb8..f36e6e2b73f0 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
-#define TIF_SECCOMP 5 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_UPROBE 7 /* breakpointed or single-stepping */
+#define TIF_UPROBE 3 /* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
+#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
+#define TIF_SECCOMP 10 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define _TIF_UPROBE _BITUL(TIF_UPROBE)
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 354344dcc198..118535123f34 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
* ns = (todval * 125) >> 9;
*
* In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
* we end up with
*
- * ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ * ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
*
*/
static inline unsigned long long tod_to_ns(unsigned long long todval)
{
- unsigned long long ns;
-
- ns = ((todval >> 32) << 23) * 125;
- ns += ((todval & 0xffffffff) * 125) >> 9;
- return ns;
+ return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
#endif
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5c1454..86b761e583e3 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -12,6 +12,7 @@ header-y += dasd.h
header-y += debug.h
header-y += errno.h
header-y += fcntl.h
+header-y += guarded_storage.h
header-y += hypfs.h
header-y += ioctl.h
header-y += ioctls.h
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000000..852850e8e17e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+ __u64 reserved;
+ __u64 gsd;
+ __u64 gssm;
+ __u64 gs_epl_a;
+};
+
+struct gs_epl {
+ __u8 pad1;
+ union {
+ __u8 gs_eam;
+ struct {
+ __u8 : 6;
+ __u8 e : 1;
+ __u8 b : 1;
+ };
+ };
+ union {
+ __u8 gs_eci;
+ struct {
+ __u8 tx : 1;
+ __u8 cx : 1;
+ __u8 : 5;
+ __u8 in : 1;
+ };
+ };
+ union {
+ __u8 gs_eai;
+ struct {
+ __u8 : 1;
+ __u8 t : 1;
+ __u8 as : 2;
+ __u8 ar : 4;
+ };
+ };
+ __u32 pad2;
+ __u64 gs_eha;
+ __u64 gs_eia;
+ __u64 gs_eoa;
+ __u64 gs_eir;
+ __u64 gs_era;
+};
+
+#define GS_ENABLE 0
+#define GS_DISABLE 1
+#define GS_SET_BC_CB 2
+#define GS_CLEAR_BC_CB 3
+#define GS_BROADCAST 4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index a2ffec4139ad..2c9ad251fa33 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -26,6 +26,8 @@
#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
+#define KVM_DEV_FLIC_AISM 9
+#define KVM_DEV_FLIC_AIRQ_INJECT 10
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
__u8 isc;
__u8 maskable;
__u8 swap;
- __u8 pad;
+ __u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
};
#define KVM_S390_IO_ADAPTER_MASK 1
@@ -197,6 +206,10 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
#define KVM_SYNC_FPRS (1UL << 8)
+#define KVM_SYNC_GSCB (1UL << 9)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -217,8 +230,16 @@ struct kvm_sync_regs {
};
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
- __u8 padding[52]; /* riccb needs to be 64byte aligned */
+ __u8 padding1[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
+ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ union {
+ __u8 sdnx[SDNXL]; /* state description annex */
+ struct {
+ __u64 reserved1[2];
+ __u64 gscb[4];
+ };
+ };
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4384bc797a54..ea42290e7d51 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,9 @@
#define __NR_copy_file_range 375
#define __NR_preadv2 376
#define __NR_pwritev2 377
-#define NR_syscalls 378
+#define __NR_s390_guarded_storage 378
+#define __NR_statx 379
+#define NR_syscalls 380
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce548fe8b..aa5adbdaf200 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y += runtime_instr.o cache.o fpu.o dumpstack.o
+obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
obj-y += entry.o reipl.o relocate_kernel.o
extra-y += head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570ded5b..6bb29633e1f1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
- OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index ae2cda5eee5a..986642a3543b 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,3 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79cc5f2..95298a41076f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
+ if (test_facility(133))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
}
static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index dff2152350a7..fa8b8f28e08b 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_UPROBE)
+ _TIF_UPROBE | _TIF_GUARDED_STORAGE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -332,6 +332,8 @@ ENTRY(system_call)
TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
#endif
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lsysc_guarded_storage
TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +411,14 @@ ENTRY(system_call)
#endif
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg gs_load_bc_cb
+
+#
# _PIF_PER_TRAP is set, call do_per_trap
#
.Lsysc_singlestep:
@@ -490,7 +500,7 @@ ENTRY(pgm_check_handler)
jnz .Lpgm_svcper # -> single stepped svc
1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 3f
+ j 4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
@@ -499,8 +509,8 @@ ENTRY(pgm_check_handler)
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
jz 3f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-3: la %r11,STACK_FRAME_OVERHEAD(%r15)
- stg %r10,__THREAD_last_break(%r14)
+3: stg %r10,__THREAD_last_break(%r14)
+4: la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -509,14 +519,14 @@ ENTRY(pgm_check_handler)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 4f
+ jz 5f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4: REENABLE_IRQS
+5: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
@@ -663,6 +673,8 @@ ENTRY(io_int_handler)
jo .Lio_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lio_guarded_storage
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +709,18 @@ ENTRY(io_int_handler)
jg load_fpu_regs
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,gs_load_bc_cb
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f901865326..dbf5f7e18246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
DECLARE_PER_CPU(u64, mt_cycles[8]);
void verify_facilities(void);
+void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..6f064745c3b1
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <[email protected]>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void exit_thread_gs(void)
+{
+ kfree(current->thread.gs_cb);
+ kfree(current->thread.gs_bc_cb);
+ current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
+}
+
+static int gs_enable(void)
+{
+ struct gs_cb *gs_cb;
+
+ if (!current->thread.gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ gs_cb->gsd = 25;
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_disable(void)
+{
+ if (current->thread.gs_cb) {
+ preempt_disable();
+ kfree(current->thread.gs_cb);
+ current->thread.gs_cb = NULL;
+ __ctl_clear_bit(2, 4);
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ if (!gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ current->thread.gs_bc_cb = gs_cb;
+ }
+ if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+ return -EFAULT;
+ return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ current->thread.gs_bc_cb = NULL;
+ kfree(gs_cb);
+ return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+ struct gs_cb *gs_cb;
+
+ preempt_disable();
+ clear_thread_flag(TIF_GUARDED_STORAGE);
+ gs_cb = current->thread.gs_bc_cb;
+ if (gs_cb) {
+ kfree(current->thread.gs_cb);
+ current->thread.gs_bc_cb = NULL;
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ }
+ preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+ struct task_struct *sibling;
+
+ read_lock(&tasklist_lock);
+ for_each_thread(current, sibling) {
+ if (!sibling->thread.gs_bc_cb)
+ continue;
+ if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+ kick_process(sibling);
+ }
+ read_unlock(&tasklist_lock);
+ return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+ struct gs_cb __user *, gs_cb)
+{
+ if (!MACHINE_HAS_GS)
+ return -EOPNOTSUPP;
+ switch (command) {
+ case GS_ENABLE:
+ return gs_enable();
+ case GS_DISABLE:
+ return gs_disable();
+ case GS_SET_BC_CB:
+ return gs_set_bc_cb(gs_cb);
+ case GS_CLEAR_BC_CB:
+ return gs_clear_bc_cb();
+ case GS_BROADCAST:
+ return gs_broadcast();
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index b67dafb7b7cf..e545ffe5155a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
+ if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+ diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d83829..db5658daf994 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
#include <asm/cacheflush.h>
#include <asm/os_info.h>
#include <asm/switch_to.h>
+#include <asm/nmi.h>
typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
*/
static noinline void __machine_kdump(void *image)
{
+ struct mcesa *mcesa;
+ unsigned long cr2_old, cr2_new;
int this_cpu, cpu;
lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (MACHINE_HAS_VX)
- save_vx_regs((void *) &S390_lowcore.vector_save_area);
+ save_vx_regs((__vector128 *) mcesa->vector_save_area);
+ if (MACHINE_HAS_GS) {
+ __ctl_store(cr2_old, 2, 2);
+ cr2_new = cr2_old | (1UL << 4);
+ __ctl_load(cr2_new, 2, 2);
+ save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+ __ctl_load(cr2_old, 2, 2);
+ }
/*
* To create a good backchain for this CPU in the dump store_status
* is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327154ee..985589523970 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
int kill_task;
u64 zero;
void *fpt_save_area;
+ struct mcesa *mcesa;
kill_task = 0;
zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
: : "Q" (S390_lowcore.fpt_creg_save_area));
}
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (!MACHINE_HAS_VX) {
/* Validate floating point registers */
asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
" la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- : : "Q" (*(struct vx_array *)
- &S390_lowcore.vector_save_area) : "1");
+ : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
+ : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
/* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
*/
kill_task = 1;
}
+ /* Validate guarded storage registers */
+ if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
+ if (!mci.gs)
+ /*
+ * Guarded storage register can't be restored and
+ * the current processes uses guarded storage.
+ * It has to be terminated.
+ */
+ kill_task = 1;
+ else
+ load_gs_cb((struct gs_cb *)
+ mcesa->guarded_storage_save_area);
+ }
/*
* We don't even try to validate the TOD register, since we simply
* can't write something sensible into that register.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 20cd339e11ae..999d7154bbdc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
*/
void exit_thread(struct task_struct *tsk)
{
- if (tsk == current)
+ if (tsk == current) {
exit_thread_runtime_instr();
+ exit_thread_gs();
+ }
}
void flush_thread(void)
@@ -124,7 +126,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
/* Initialize per thread user and system timer values */
p->thread.user_timer = 0;
+ p->thread.guest_timer = 0;
p->thread.system_timer = 0;
+ p->thread.hardirq_timer = 0;
+ p->thread.softirq_timer = 0;
frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
@@ -156,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
/* Don't copy runtime instrumentation info */
p->thread.ri_cb = NULL;
frame->childregs.psw.mask &= ~PSW_MASK_RI;
+ /* Don't copy guarded storage control block */
+ p->thread.gs_cb = NULL;
+ p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929a6261..c73709869447 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe"
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
};
static const char * const int_hwcap_str[] = {
"sie"
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a1ec3c..c933e255b5d5 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
struct per_regs old, new;
-
+ unsigned long cr0_old, cr0_new;
+ unsigned long cr2_old, cr2_new;
+ int cr0_changed, cr2_changed;
+
+ __ctl_store(cr0_old, 0, 0);
+ __ctl_store(cr2_old, 2, 2);
+ cr0_new = cr0_old;
+ cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr, cr_new;
-
- __ctl_store(cr, 0, 0);
/* Set or clear transaction execution TXC bit 8. */
- cr_new = cr | (1UL << 55);
+ cr0_new |= (1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
- if (cr_new != cr)
- __ctl_load(cr_new, 0, 0);
+ cr0_new &= ~(1UL << 55);
/* Set or clear transaction execution TDC bits 62 and 63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
+ cr2_new &= ~3UL;
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
+ cr2_new |= 1UL;
else
- cr_new |= 2UL;
+ cr2_new |= 2UL;
}
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
}
+ /* Take care of enable/disable of guarded storage. */
+ if (MACHINE_HAS_GS) {
+ cr2_new &= ~(1UL << 4);
+ if (task->thread.gs_cb)
+ cr2_new |= (1UL << 4);
+ }
+ /* Load control register 0/2 iff changed */
+ cr0_changed = cr0_new != cr0_old;
+ cr2_changed = cr2_new != cr2_old;
+ if (cr0_changed)
+ __ctl_load(cr0_new, 0, 0);
+ if (cr2_changed)
+ __ctl_load(cr2_new, 2, 2);
/* Copy user specified PER registers */
new.control = thread->per_user.control;
new.start = thread->per_user.start;
@@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target,
data, 0, sizeof(unsigned int));
}
+static int s390_gs_cb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
static const struct user_regset s390_regsets[] = {
{
.core_note_type = NT_PRSTATUS,
@@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = {
.get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
};
static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
};
static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b49be0..3ae756c0db3d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
MAX_FACILITY_BIT/8);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ unsigned long bits, size;
+
+ bits = MACHINE_HAS_GS ? 11 : 10;
+ size = 1UL << bits;
+ lc->mcesad = (__u64) memblock_virt_alloc(size, size);
+ if (MACHINE_HAS_GS)
+ lc->mcesad |= bits;
+ }
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_S390_VXRS_BCD;
}
+ /*
+ * Guarded storage support HWCAP_S390_GS is bit 12.
+ */
+ if (MACHINE_HAS_GS)
+ elf_hwcap |= HWCAP_S390_GS;
+
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 47a973b5b4f1..286bcee800f4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
#include <asm/os_info.h>
#include <asm/sigp.h>
#include <asm/idle.h>
+#include <asm/nmi.h>
#include "entry.h"
enum {
@@ -78,6 +79,8 @@ struct pcpu {
static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
+static struct kmem_cache *pcpu_mcesa_cache;
+
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, panic_stack;
+ unsigned long mcesa_origin, mcesa_bits;
struct lowcore *lc;
+ mcesa_origin = mcesa_bits = 0;
if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
panic_stack = __get_free_page(GFP_KERNEL);
if (!pcpu->lowcore || !panic_stack || !async_stack)
goto out;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = (unsigned long)
+ kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
+ if (!mcesa_origin)
+ goto out;
+ mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
+ }
} else {
async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
}
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->mcesad = mcesa_origin | mcesa_bits;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
if (vdso_alloc_per_cpu(lc))
goto out;
lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
return 0;
out:
if (pcpu != &pcpu_devices[0]) {
+ if (mcesa_origin)
+ kmem_cache_free(pcpu_mcesa_cache,
+ (void *) mcesa_origin);
free_page(panic_stack);
free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
+ unsigned long mcesa_origin;
+
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore);
if (pcpu == &pcpu_devices[0])
return;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
+ }
free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!MACHINE_HAS_VX)
+ if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
return 0;
- pa = __pa(pcpu->lowcore->vector_save_area_addr);
+ pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+ if (MACHINE_HAS_GS)
+ pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ unsigned long size;
+
/* request the 0x1201 emergency signal external interrupt */
if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
+ /* create slab cache for the machine-check-extended-save-areas */
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
+ pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
+ size, size, 0, NULL);
+ if (!pcpu_mcesa_cache)
+ panic("Couldn't create nmi save area cache");
+ }
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9b59e6212d8f..54fce7b065de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
SYSCALL(sys_preadv2,compat_sys_preadv2)
SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
+SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c14fc9029912..072d84ba42a3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
}
static void account_system_index_scaled(struct task_struct *p,
- cputime_t cputime, cputime_t scaled,
+ u64 cputime, u64 scaled,
enum cpu_usage_stat index)
{
p->stimescaled += cputime_to_nsecs(scaled);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d55c829a5944..709aca9ceb05 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -262,7 +262,7 @@ struct aste {
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1) {
+ if (vcpu->arch.sie_block->eca & ECA_SII) {
int rc;
read_lock(&vcpu->kvm->arch.sca_lock);
@@ -361,7 +361,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
void ipte_lock(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1)
+ if (vcpu->arch.sie_block->eca & ECA_SII)
ipte_lock_siif(vcpu);
else
ipte_lock_simple(vcpu);
@@ -369,7 +369,7 @@ void ipte_lock(struct kvm_vcpu *vcpu)
void ipte_unlock(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1)
+ if (vcpu->arch.sie_block->eca & ECA_SII)
ipte_unlock_siif(vcpu);
else
ipte_unlock_simple(vcpu);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 59920f96ebc0..f5378f336127 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xb6] = kvm_s390_handle_stctl,
[0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
+ [0xe3] = kvm_s390_handle_e3,
[0xe5] = kvm_s390_handle_e5,
[0xeb] = kvm_s390_handle_eb,
};
@@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
vcpu->arch.sie_block->ipb);
- if (vcpu->arch.sie_block->ipa == 0xb256 &&
- test_kvm_facility(vcpu->kvm, 74))
+ if (vcpu->arch.sie_block->ipa == 0xb256)
return handle_sthyi(vcpu);
if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
@@ -404,26 +404,26 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->icptcode) {
- case 0x10:
- case 0x18:
+ case ICPT_EXTREQ:
+ case ICPT_IOREQ:
return handle_noop(vcpu);
- case 0x04:
+ case ICPT_INST:
rc = handle_instruction(vcpu);
break;
- case 0x08:
+ case ICPT_PROGI:
return handle_prog(vcpu);
- case 0x14:
+ case ICPT_EXTINT:
return handle_external_interrupt(vcpu);
- case 0x1c:
+ case ICPT_WAIT:
return kvm_s390_handle_wait(vcpu);
- case 0x20:
+ case ICPT_VALIDITY:
return handle_validity(vcpu);
- case 0x28:
+ case ICPT_STOP:
return handle_stop(vcpu);
- case 0x2c:
+ case ICPT_OPEREXC:
rc = handle_operexc(vcpu);
break;
- case 0x38:
+ case ICPT_PARTEXEC:
rc = handle_partial_execution(vcpu);
break;
default:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f14199734..482673e3436d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
struct kvm_s390_mchk_info *mchk)
{
unsigned long ext_sa_addr;
+ unsigned long lc;
freg_t fprs[NUM_FPRS];
union mci mci;
int rc;
@@ -420,10 +421,30 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
save_access_regs(vcpu->run->s.regs.acrs);
/* Extended save area */
- rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
- sizeof(unsigned long));
- /* Only bits 0-53 are used for address formation */
- ext_sa_addr &= ~0x3ffUL;
+ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+ sizeof(unsigned long));
+ /* Only bits 0 through 63-LC are used for address formation */
+ lc = ext_sa_addr & MCESA_LC_MASK;
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ switch (lc) {
+ case 0:
+ case 10:
+ ext_sa_addr &= ~0x3ffUL;
+ break;
+ case 11:
+ ext_sa_addr &= ~0x7ffUL;
+ break;
+ case 12:
+ ext_sa_addr &= ~0xfffUL;
+ break;
+ default:
+ ext_sa_addr = 0;
+ break;
+ }
+ } else {
+ ext_sa_addr &= ~0x3ffUL;
+ }
+
if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
512))
@@ -431,6 +452,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
} else {
mci.vr = 0;
}
+ if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133)
+ && (lc == 11 || lc == 12)) {
+ if (write_guest_abs(vcpu, ext_sa_addr + 1024,
+ &vcpu->run->s.regs.gscb, 32))
+ mci.gs = 0;
+ } else {
+ mci.gs = 0;
+ }
/* General interruption information */
rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
@@ -1968,6 +1997,8 @@ static int register_io_adapter(struct kvm_device *dev,
adapter->maskable = adapter_info.maskable;
adapter->masked = false;
adapter->swap = adapter_info.swap;
+ adapter->suppressible = (adapter_info.flags) &
+ KVM_S390_ADAPTER_SUPPRESSIBLE;
dev->kvm->arch.adapters[adapter->id] = adapter;
return 0;
@@ -2121,6 +2152,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_req req;
+ int ret = 0;
+
+ if (!fi->ais_enabled)
+ return -ENOTSUPP;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ if (req.isc > MAX_ISC)
+ return -EINVAL;
+
+ trace_kvm_s390_modify_ais_mode(req.isc,
+ (fi->simm & AIS_MODE_MASK(req.isc)) ?
+ (fi->nimm & AIS_MODE_MASK(req.isc)) ?
+ 2 : KVM_S390_AIS_MODE_SINGLE :
+ KVM_S390_AIS_MODE_ALL, req.mode);
+
+ mutex_lock(&fi->ais_lock);
+ switch (req.mode) {
+ case KVM_S390_AIS_MODE_ALL:
+ fi->simm &= ~AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ case KVM_S390_AIS_MODE_SINGLE:
+ fi->simm |= AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ mutex_unlock(&fi->ais_lock);
+
+ return ret;
+}
+
+static int kvm_s390_inject_airq(struct kvm *kvm,
+ struct s390_io_adapter *adapter)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ int ret = 0;
+
+ if (!fi->ais_enabled || !adapter->suppressible)
+ return kvm_s390_inject_vm(kvm, &s390int);
+
+ mutex_lock(&fi->ais_lock);
+ if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
+ trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+ goto out;
+ }
+
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
+ fi->nimm |= AIS_MODE_MASK(adapter->isc);
+ trace_kvm_s390_modify_ais_mode(adapter->isc,
+ KVM_S390_AIS_MODE_SINGLE, 2);
+ }
+out:
+ mutex_unlock(&fi->ais_lock);
+ return ret;
+}
+
+static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ unsigned int id = attr->attr;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter)
+ return -EINVAL;
+
+ return kvm_s390_inject_airq(kvm, adapter);
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2157,6 +2269,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
r = clear_io_irq(dev->kvm, attr);
break;
+ case KVM_DEV_FLIC_AISM:
+ r = modify_ais_mode(dev->kvm, attr);
+ break;
+ case KVM_DEV_FLIC_AIRQ_INJECT:
+ r = flic_inject_airq(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2176,6 +2294,8 @@ static int flic_has_attr(struct kvm_device *dev,
case KVM_DEV_FLIC_ADAPTER_REGISTER:
case KVM_DEV_FLIC_ADAPTER_MODIFY:
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ case KVM_DEV_FLIC_AISM:
+ case KVM_DEV_FLIC_AIRQ_INJECT:
return 0;
}
return -ENXIO;
@@ -2286,12 +2406,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
- struct kvm_s390_interrupt s390int = {
- .type = KVM_S390_INT_IO(1, 0, 0, 0),
- .parm = 0,
- .parm64 = (adapter->isc << 27) | 0x80000000,
- };
- ret = kvm_s390_inject_vm(kvm, &s390int);
+ ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
ret = 1;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05bb6a7..11b7d6638991 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -380,6 +380,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_AIS:
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
@@ -405,6 +406,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_RI:
r = test_facility(64);
break;
+ case KVM_CAP_S390_GS:
+ r = test_facility(133);
+ break;
default:
r = 0;
}
@@ -541,6 +545,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
r ? "(not available)" : "(success)");
break;
+ case KVM_CAP_S390_AIS:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else {
+ set_kvm_facility(kvm->arch.model.fac_mask, 72);
+ set_kvm_facility(kvm->arch.model.fac_list, 72);
+ kvm->arch.float_int.ais_enabled = 1;
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: AIS %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_GS:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (test_facility(133)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 133);
+ set_kvm_facility(kvm->arch.model.fac_list, 133);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
+ r ? "(not available)" : "(success)");
+ break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@@ -1498,6 +1530,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_crypto_init(kvm);
+ mutex_init(&kvm->arch.float_int.ais_lock);
+ kvm->arch.float_int.simm = 0;
+ kvm->arch.float_int.nimm = 0;
+ kvm->arch.float_int.ais_enabled = 0;
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@ -1646,7 +1682,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
- vcpu->arch.sie_block->ecb2 |= 0x04U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
@@ -1700,7 +1736,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
vcpu->arch.sie_block->scaoh = scaoh;
vcpu->arch.sie_block->scaol = scaol;
- vcpu->arch.sie_block->ecb2 |= 0x04U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
}
kvm->arch.sca = new_sca;
kvm->arch.use_esca = 1;
@@ -1749,6 +1785,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 133))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
@@ -1939,8 +1977,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
- vcpu->arch.sie_block->ecb2 |= 0x80;
- vcpu->arch.sie_block->ecb2 &= ~0x08;
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -1970,29 +2008,31 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
if (MACHINE_HAS_ESOP)
- vcpu->arch.sie_block->ecb |= 0x02;
+ vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
- vcpu->arch.sie_block->ecb |= 0x04;
+ vcpu->arch.sie_block->ecb |= ECB_SRSI;
if (test_kvm_facility(vcpu->kvm, 73))
- vcpu->arch.sie_block->ecb |= 0x10;
+ vcpu->arch.sie_block->ecb |= ECB_TE;
if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
- vcpu->arch.sie_block->ecb2 |= 0x08;
+ vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
if (test_kvm_facility(vcpu->kvm, 130))
- vcpu->arch.sie_block->ecb2 |= 0x20;
- vcpu->arch.sie_block->eca = 0x1002000U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
+ vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
if (sclp.has_cei)
- vcpu->arch.sie_block->eca |= 0x80000000U;
+ vcpu->arch.sie_block->eca |= ECA_CEI;
if (sclp.has_ib)
- vcpu->arch.sie_block->eca |= 0x40000000U;
+ vcpu->arch.sie_block->eca |= ECA_IB;
if (sclp.has_siif)
- vcpu->arch.sie_block->eca |= 1;
+ vcpu->arch.sie_block->eca |= ECA_SII;
if (sclp.has_sigpif)
- vcpu->arch.sie_block->eca |= 0x10000000U;
+ vcpu->arch.sie_block->eca |= ECA_SIGPI;
if (test_kvm_facility(vcpu->kvm, 129)) {
- vcpu->arch.sie_block->eca |= 0x00020000;
- vcpu->arch.sie_block->ecd |= 0x20000000;
+ vcpu->arch.sie_block->eca |= ECA_VX;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
}
+ vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
+ | SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
@@ -2719,6 +2759,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
+ struct runtime_instr_cb *riccb;
+ struct gs_cb *gscb;
+
+ riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+ gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
@@ -2747,12 +2792,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
* we should enable RI here instead of doing the lazy enablement.
*/
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
- test_kvm_facility(vcpu->kvm, 64)) {
- struct runtime_instr_cb *riccb =
- (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
-
- if (riccb->valid)
- vcpu->arch.sie_block->ecb3 |= 0x01;
+ test_kvm_facility(vcpu->kvm, 64) &&
+ riccb->valid &&
+ !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+ }
+ /*
+ * If userspace sets the gscb (e.g. after migration) to non-zero,
+ * we should enable GS here instead of doing the lazy enablement.
+ */
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
+ test_kvm_facility(vcpu->kvm, 133) &&
+ gscb->gssm &&
+ !vcpu->arch.gs_enabled) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
}
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
@@ -2768,6 +2825,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
kvm_run->kvm_dirty_regs = 0;
}
@@ -2794,6 +2865,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ if (MACHINE_HAS_GS) {
+ __ctl_set_bit(2, 4);
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
+ preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
+ }
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index af9fa91a0c91..455124fe0647 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -25,7 +25,7 @@
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
/* Transactional Memory Execution related macros */
-#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
+#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
#define TDB_FORMAT1 1
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
@@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
int is_valid_psw(psw_t *psw);
int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu);
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 64b6a309f2c4..0ffe973535fa 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -37,7 +37,8 @@
static int handle_ri(struct kvm_vcpu *vcpu)
{
if (test_kvm_facility(vcpu->kvm, 64)) {
- vcpu->arch.sie_block->ecb3 |= 0x01;
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
kvm_s390_retry_instr(vcpu);
return 0;
} else
@@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_gs(struct kvm_vcpu *vcpu)
+{
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ preempt_enable();
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ } else
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
+{
+ int code = vcpu->arch.sie_block->ipb & 0xff;
+
+ if (code == 0x49 || code == 0x4d)
+ return handle_gs(vcpu);
+ else
+ return -EOPNOTSUPP;
+}
/* Handle SCK (SET CLOCK) interception */
static int handle_set_clock(struct kvm_vcpu *vcpu)
{
@@ -759,6 +787,7 @@ static const intercept_handler_t b2_handlers[256] = {
[0x3b] = handle_io_inst,
[0x3c] = handle_io_inst,
[0x50] = handle_ipte_interlock,
+ [0x56] = handle_sthyi,
[0x5f] = handle_io_inst,
[0x74] = handle_io_inst,
[0x76] = handle_io_inst,
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 05c98bb853cf..926b5244263e 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
u64 code, addr, cc = 0;
struct sthyi_sctns *sctns = NULL;
+ if (!test_kvm_facility(vcpu->kvm, 74))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
/*
* STHYI requires extensive locking in the higher hypervisors
* and is very computational/memory expensive. Therefore we
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 396485bca191..78b7e847984a 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs,
__entry->state ? "enabling" : "disabling", __entry->id)
);
+/*
+ * Trace point for modifying ais mode for a given isc.
+ */
+TRACE_EVENT(kvm_s390_modify_ais_mode,
+ TP_PROTO(__u8 isc, __u16 from, __u16 to),
+ TP_ARGS(isc, from, to),
+
+ TP_STRUCT__entry(
+ __field(__u8, isc)
+ __field(__u16, from)
+ __field(__u16, to)
+ ),
+
+ TP_fast_assign(
+ __entry->isc = isc;
+ __entry->from = from;
+ __entry->to = to;
+ ),
+
+ TP_printk("for isc %x, modifying interruption mode from %s to %s",
+ __entry->isc,
+ (__entry->from == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->from == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode",
+ (__entry->to == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->to == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode")
+ );
+
+/*
+ * Trace point for suppressed adapter I/O interrupt.
+ */
+TRACE_EVENT(kvm_s390_airq_suppressed,
+ TP_PROTO(__u32 id, __u8 isc),
+ TP_ARGS(id, isc),
+
+ TP_STRUCT__entry(
+ __field(__u32, id)
+ __field(__u8, isc)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->isc = isc;
+ ),
+
+ TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)",
+ __entry->id, __entry->isc)
+ );
+
#endif /* _TRACE_KVMS390_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 5491be39776b..2fafc2be777f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -249,7 +249,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- bool had_tx = scb_s->ecb & 0x10U;
+ bool had_tx = scb_s->ecb & ECB_TE;
unsigned long new_mso = 0;
int rc;
@@ -307,34 +307,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ihcpu = scb_o->ihcpu;
/* MVPG and Protection Exception Interpretation are always available */
- scb_s->eca |= scb_o->eca & 0x01002000U;
+ scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
/* Host-protection-interruption introduced with ESOP */
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
- scb_s->ecb |= scb_o->ecb & 0x02U;
+ scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
/* transactional execution */
if (test_kvm_facility(vcpu->kvm, 73)) {
/* remap the prefix is tx is toggled on */
- if ((scb_o->ecb & 0x10U) && !had_tx)
+ if ((scb_o->ecb & ECB_TE) && !had_tx)
prefix_unmapped(vsie_page);
- scb_s->ecb |= scb_o->ecb & 0x10U;
+ scb_s->ecb |= scb_o->ecb & ECB_TE;
}
/* SIMD */
if (test_kvm_facility(vcpu->kvm, 129)) {
- scb_s->eca |= scb_o->eca & 0x00020000U;
- scb_s->ecd |= scb_o->ecd & 0x20000000U;
+ scb_s->eca |= scb_o->eca & ECA_VX;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
}
/* Run-time-Instrumentation */
if (test_kvm_facility(vcpu->kvm, 64))
- scb_s->ecb3 |= scb_o->ecb3 & 0x01U;
+ scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
/* Instruction Execution Prevention */
if (test_kvm_facility(vcpu->kvm, 130))
- scb_s->ecb2 |= scb_o->ecb2 & 0x20U;
+ scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
+ /* Guarded Storage */
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ scb_s->ecb |= scb_o->ecb & ECB_GS;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+ }
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
- scb_s->eca |= scb_o->eca & 0x00000001U;
+ scb_s->eca |= scb_o->eca & ECA_SII;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
- scb_s->eca |= scb_o->eca & 0x40000000U;
+ scb_s->eca |= scb_o->eca & ECA_IB;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
- scb_s->eca |= scb_o->eca & 0x80000000U;
+ scb_s->eca |= scb_o->eca & ECA_CEI;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
@@ -406,7 +411,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
prefix += scb_s->mso;
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
- if (!rc && (scb_s->ecb & 0x10U))
+ if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
prefix + PAGE_SIZE);
/*
@@ -496,6 +501,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
unpin_guest_page(vcpu->kvm, gpa, hpa);
scb_s->riccbd = 0;
}
+
+ hpa = scb_s->sdnxo;
+ if (hpa) {
+ gpa = scb_o->sdnxo;
+ unpin_guest_page(vcpu->kvm, gpa, hpa);
+ scb_s->sdnxo = 0;
+ }
}
/*
@@ -543,7 +555,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->itdba & ~0xffUL;
- if (gpa && (scb_s->ecb & 0x10U)) {
+ if (gpa && (scb_s->ecb & ECB_TE)) {
if (!(gpa & ~0x1fffU)) {
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
@@ -558,8 +570,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->gvrd & ~0x1ffUL;
- if (gpa && (scb_s->eca & 0x00020000U) &&
- !(scb_s->ecd & 0x20000000U)) {
+ if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x1310U);
goto unpin;
@@ -577,7 +588,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->riccbd & ~0x3fUL;
- if (gpa && (scb_s->ecb3 & 0x01U)) {
+ if (gpa && (scb_s->ecb3 & ECB3_RI)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0043U);
goto unpin;
@@ -591,6 +602,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
goto unpin;
scb_s->riccbd = hpa;
}
+ if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+ unsigned long sdnxc;
+
+ gpa = scb_o->sdnxo & ~0xfUL;
+ sdnxc = scb_o->sdnxo & 0xfUL;
+ if (!gpa || !(gpa & ~0x1fffUL)) {
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ goto unpin;
+ }
+ if (sdnxc < 6 || sdnxc > 12) {
+ rc = set_validity_icpt(scb_s, 0x10b1U);
+ goto unpin;
+ }
+ if (gpa & ((1 << sdnxc) - 1)) {
+ rc = set_validity_icpt(scb_s, 0x10b2U);
+ goto unpin;
+ }
+ /* Due to alignment rules (checked above) this cannot
+ * cross page boundaries
+ */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc == -EINVAL)
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ if (rc)
+ goto unpin;
+ scb_s->sdnxo = hpa;
+ }
return 0;
unpin:
unpin_blocks(vcpu, vsie_page);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b48dc5f1900b..463e5ef02304 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
pgste_t pgste;
pte_t *ptep;
pte_t pte;
bool dirty;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return false;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return false;
+ /* We can't run guests backed by huge pages, but userspace can
+ * still set them up and then try to migrate them without any
+ * migration support.
+ */
+ if (pmd_large(*pmd))
+ return true;
+
+ ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (unlikely(!ptep))
return false;
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index e359ec675869..12daf45369b4 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
*/
#include <linux/extable.h>
+#include <linux/ptrace.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c
index ec871355fc2d..6736a3ad6286 100644
--- a/arch/score/mm/extable.c
+++ b/arch/score/mm/extable.c
@@ -24,6 +24,8 @@
*/
#include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
int fixup_exception(struct pt_regs *regs)
{
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 349d4d17aa7f..2aa1ad194db2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2101,8 +2101,8 @@ static int x86_pmu_event_init(struct perf_event *event)
static void refresh_pce(void *ignored)
{
- if (current->mm)
- load_mm_cr4(current->mm);
+ if (current->active_mm)
+ load_mm_cr4(current->active_mm);
}
static void x86_pmu_event_mapped(struct perf_event *event)
@@ -2110,6 +2110,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
+ /*
+ * This function relies on not being called concurrently in two
+ * tasks in the same mm. Otherwise one task could observe
+ * perf_rdpmc_allowed > 1 and return all the way back to
+ * userspace with CR4.PCE clear while another task is still
+ * doing on_each_cpu_mask() to propagate CR4.PCE.
+ *
+ * For now, this can't happen because all callers hold mmap_sem
+ * for write. If this changes, we'll need a different solution.
+ */
+ lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74ef58c8ff53..2cc5ec7cc6f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,8 +43,6 @@
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
-#define KVM_PIO_PAGE_OFFSET 1
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define KVM_HALT_POLL_NS_DEFAULT 400000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
@@ -343,9 +341,10 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
- int root_level;
- int shadow_root_level;
union kvm_mmu_page_role base_role;
+ u8 root_level;
+ u8 shadow_root_level;
+ u8 ept_ad;
bool direct_map;
/*
@@ -727,6 +726,7 @@ struct kvm_hv {
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
+ KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index d74747b031ec..c4eda791f877 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
};
void kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
- defined(CONFIG_PARAVIRT))
static inline void native_pud_clear(pud_t *pudp)
{
}
-#endif
static inline void pud_clear(pud_t *pudp)
{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..585ee0d42d18 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
# define set_pud(pudp, pud) native_set_pud(pudp, pud)
#endif
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
#define pud_clear(pud) native_pud_clear(pud)
#endif
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
new file mode 100644
index 000000000000..d7da2729903d
--- /dev/null
+++ b/arch/x86/include/asm/purgatory.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ * lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index ff4923a19f79..75d002bdb3f3 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -198,7 +198,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
static inline void __flush_tlb_all(void)
{
- if (static_cpu_has(X86_FEATURE_PGE))
+ if (boot_cpu_has(X86_FEATURE_PGE))
__flush_tlb_global();
else
__flush_tlb();
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cc54b7026567..35cd06f636ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -70,8 +70,10 @@
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
@@ -516,12 +518,14 @@ struct vmx_msr_entry {
#define EPT_VIOLATION_READABLE_BIT 3
#define EPT_VIOLATION_WRITABLE_BIT 4
#define EPT_VIOLATION_EXECUTABLE_BIT 5
+#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
/*
* VM-instruction error numbers
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..c2824d02ba37 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
#define DE_VECTOR 0
#define DB_VECTOR 1
#define BP_VECTOR 3
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 14458658e988..690a2dcf4078 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_RDRAND 57
#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_VMFUNC 59
+#define EXIT_REASON_ENCLS 60
+#define EXIT_REASON_RDSEED 61
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
@@ -90,6 +94,7 @@
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
{ EXIT_REASON_CPUID, "CPUID" }, \
{ EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVLPG, "INVLPG" }, \
{ EXIT_REASON_RDPMC, "RDPMC" }, \
{ EXIT_REASON_RDTSC, "RDTSC" }, \
@@ -108,6 +113,8 @@
{ EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
{ EXIT_REASON_MSR_READ, "MSR_READ" }, \
{ EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
{ EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
{ EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
{ EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
- { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
- { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
+ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
+ { EXIT_REASON_RDTSCP, "RDTSCP" }, \
{ EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_WBINVD, "WBINVD" }, \
+ { EXIT_REASON_XSETBV, "XSETBV" }, \
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
- { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
- { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
- { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
- { EXIT_REASON_INVD, "INVD" }, \
- { EXIT_REASON_INVVPID, "INVVPID" }, \
+ { EXIT_REASON_RDRAND, "RDRAND" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
+ { EXIT_REASON_VMFUNC, "VMFUNC" }, \
+ { EXIT_REASON_ENCLS, "ENCLS" }, \
+ { EXIT_REASON_RDSEED, "RDSEED" }, \
+ { EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae32838cac5f..b2879cc23db4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
return -EINVAL;
}
+ if (!enabled) {
+ ++disabled_cpus;
+ return -EINVAL;
+ }
+
if (boot_cpu_physical_apicid != -1U)
ver = boot_cpu_apic_version;
- cpu = __generic_processor_info(id, ver, enabled);
+ cpu = generic_processor_info(id, ver);
if (cpu >= 0)
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
@@ -710,7 +715,7 @@ static void __init acpi_set_irq_model_ioapic(void)
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index aee7deddabd0..8ccb7ef512e0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid)
return nr_logical_cpuids++;
}
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
if (num_processors >= nr_cpu_ids) {
int thiscpu = max + disabled_cpus;
- if (enabled) {
- pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
- "reached. Processor %d/0x%x ignored.\n",
- max, thiscpu, apicid);
- }
+ pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+ "reached. Processor %d/0x%x ignored.\n",
+ max, thiscpu, apicid);
disabled_cpus++;
return -EINVAL;
@@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
apic->x86_32_early_logical_apicid(cpu);
#endif
set_cpu_possible(cpu, true);
-
- if (enabled) {
- num_processors++;
- physid_set(apicid, phys_cpu_present_map);
- set_cpu_present(cpu, true);
- } else {
- disabled_cpus++;
- }
+ physid_set(apicid, phys_cpu_present_map);
+ set_cpu_present(cpu, true);
+ num_processors++;
return cpu;
}
-int generic_processor_info(int apicid, int version)
-{
- return __generic_processor_info(apicid, version, true);
-}
-
int hard_smp_processor_id(void)
{
return read_apic_id();
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c05509d38b1f..9ac2a5cdd9c2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
(rdtgrp->flags & RDT_DELETED)) {
kernfs_unbreak_active_protection(kn);
- kernfs_put(kn);
+ kernfs_put(rdtgrp->kn);
kfree(rdtgrp);
} else {
kernfs_unbreak_active_protection(kn);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372f5dbb..b5785c197e53 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -4,6 +4,7 @@
* Copyright (C) 2000 Andrea Arcangeli <[email protected]> SuSE
*/
+#define DISABLE_BRANCH_PROFILING
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14f65a5f938e..da5c09789984 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
src = &per_cpu(steal_time, cpu);
do {
version = src->version;
- rmb();
+ virt_rmb();
steal = src->steal;
- rmb();
+ virt_rmb();
} while ((version & 1) || (version != src->version));
return steal;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 307b1f4543de..857cdbd02867 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image)
/* Setup copying of backup region */
if (image->type == KEXEC_TYPE_CRASH) {
- ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_dest",
&image->arch.backup_load_addr,
sizeof(image->arch.backup_load_addr), 0);
if (ret)
return ret;
- ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_src",
&image->arch.backup_src_start,
sizeof(image->arch.backup_src_start), 0);
if (ret)
return ret;
- ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_sz",
&image->arch.backup_src_sz,
sizeof(image->arch.backup_src_sz), 0);
if (ret)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f088ea4c66e7..a723ae9440ab 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
spin_lock_irqsave(&desc->lock, flags);
/*
- * most handlers of type NMI_UNKNOWN never return because
- * they just assume the NMI is theirs. Just a sanity check
- * to manage expectations
+ * Indicate if there are multiple registrations on the
+ * internal NMI handler call chains (SERR and IO_CHECK).
*/
- WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4194d6f9bb29..067f9813fd2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
.ident = "ASUS EeeBook X205TA",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
},
},
{ /* Handle problems with rebooting on ASUS EeeBook X205TAW */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4f7a9833d8e5..c73a7f9e881a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
* the refined calibration and directly register it as a clocksource.
*/
if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+ if (boot_cpu_has(X86_FEATURE_ART))
+ art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
return 0;
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 478d15dbaee4..08339262b666 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
return sizeof(*regs);
}
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
static bool is_last_task_frame(struct unwind_state *state)
{
- unsigned long bp = (unsigned long)state->bp;
- unsigned long regs = (unsigned long)task_pt_regs(state->task);
+ unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+ unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
/*
* We have to check for the last task frame at two different locations
* because gcc can occasionally decide to realign the stack pointer and
- * change the offset of the stack frame by a word in the prologue of a
- * function called by head/entry code.
+ * change the offset of the stack frame in the prologue of a function
+ * called by head/entry code. Examples:
+ *
+ * <start_secondary>:
+ * push %edi
+ * lea 0x8(%esp),%edi
+ * and $0xfffffff8,%esp
+ * pushl -0x4(%edi)
+ * push %ebp
+ * mov %esp,%ebp
+ *
+ * <x86_64_start_kernel>:
+ * lea 0x8(%rsp),%r10
+ * and $0xfffffffffffffff0,%rsp
+ * pushq -0x8(%r10)
+ * push %rbp
+ * mov %rsp,%rbp
+ *
+ * Note that after aligning the stack, it pushes a duplicate copy of
+ * the return address before pushing the frame pointer.
*/
- return bp == regs - FRAME_HEADER_SIZE ||
- bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+ return (state->bp == last_bp ||
+ (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
}
/*
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ab8e32f7b9a8..760433b2574a 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,18 +86,6 @@ config KVM_MMU_AUDIT
This option adds a R/W kVM module parameter 'mmu_audit', which allows
auditing of KVM MMU events at runtime.
-config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support (DEPRECATED)"
- depends on KVM && PCI && IOMMU_API
- default n
- ---help---
- Provide support for legacy PCI device assignment through KVM. The
- kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes this support and provides
- better security.
-
- If unsure, say N.
-
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff20710471..09d4b17be022 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o page_track.o debugfs.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
-
kvm-intel-y += vmx.o pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
deleted file mode 100644
index 308b8597c691..000000000000
--- a/arch/x86/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1058 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-#include "assigned-dev.h"
-#include "trace/events/kvm.h"
-
-struct kvm_assigned_dev_kernel {
- struct kvm_irq_ack_notifier ack_notifier;
- struct list_head list;
- int assigned_dev_id;
- int host_segnr;
- int host_busnr;
- int host_devfn;
- unsigned int entries_nr;
- int host_irq;
- bool host_irq_disabled;
- bool pci_2_3;
- struct msix_entry *host_msix_entries;
- int guest_irq;
- struct msix_entry *guest_msix_entries;
- unsigned long irq_requested_type;
- int irq_source_id;
- int flags;
- struct pci_dev *dev;
- struct kvm *kvm;
- spinlock_t intx_lock;
- spinlock_t intx_mask_lock;
- char irq_name[32];
- struct pci_saved_state *pci_saved_state;
-};
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
- int assigned_dev_id)
-{
- struct kvm_assigned_dev_kernel *match;
-
- list_for_each_entry(match, head, list) {
- if (match->assigned_dev_id == assigned_dev_id)
- return match;
- }
- return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
- *assigned_dev, int irq)
-{
- int i, index;
- struct msix_entry *host_msix_entries;
-
- host_msix_entries = assigned_dev->host_msix_entries;
-
- index = -1;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- if (irq == host_msix_entries[i].vector) {
- index = i;
- break;
- }
- if (index < 0)
- printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
- return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret;
-
- spin_lock(&assigned_dev->intx_lock);
- if (pci_check_and_mask_intx(assigned_dev->dev)) {
- assigned_dev->host_irq_disabled = true;
- ret = IRQ_WAKE_THREAD;
- } else
- ret = IRQ_NONE;
- spin_unlock(&assigned_dev->intx_lock);
-
- return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
- int vector)
-{
- if (unlikely(assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_GUEST_INTX)) {
- spin_lock(&assigned_dev->intx_mask_lock);
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id, vector, 1,
- false);
- spin_unlock(&assigned_dev->intx_mask_lock);
- } else
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- disable_irq_nosync(irq);
- assigned_dev->host_irq_disabled = true;
- spin_unlock_irq(&assigned_dev->intx_lock);
- }
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- * Other values - No need to retry.
- */
-static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
- int level)
-{
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- struct kvm_kernel_irq_routing_entry *e;
- int ret = -EINVAL;
- int idx;
-
- trace_kvm_set_irq(irq, level, irq_source_id);
-
- /*
- * Injection into either PIC or IOAPIC might need to scan all CPUs,
- * which would need to be retried from thread context; when same GSI
- * is connected to both PIC and IOAPIC, we'd have to report a
- * partial failure here.
- * Since there's no easy way to do this, we only support injecting MSI
- * which is limited to 1:1 GSI mapping.
- */
- idx = srcu_read_lock(&kvm->irq_srcu);
- if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
- e = &entries[0];
- ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
- irq, level);
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
-
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
- int ret = 0;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- vector, 1);
- }
-
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
- }
-
- return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_assigned_dev_kernel *dev =
- container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
-
- kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
- spin_lock(&dev->intx_mask_lock);
-
- if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
- bool reassert = false;
-
- spin_lock_irq(&dev->intx_lock);
- /*
- * The guest IRQ may be shared so this ack can come from an
- * IRQ for another guest device.
- */
- if (dev->host_irq_disabled) {
- if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
- enable_irq(dev->host_irq);
- else if (!pci_check_and_unmask_intx(dev->dev))
- reassert = true;
- dev->host_irq_disabled = reassert;
- }
- spin_unlock_irq(&dev->intx_lock);
-
- if (reassert)
- kvm_set_irq(dev->kvm, dev->irq_source_id,
- dev->guest_irq, 1, false);
- }
-
- spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- if (assigned_dev->ack_notifier.gsi != -1)
- kvm_unregister_irq_ack_notifier(kvm,
- &assigned_dev->ack_notifier);
-
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 0, false);
-
- if (assigned_dev->irq_source_id != -1)
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
- assigned_dev->irq_source_id = -1;
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- /*
- * We disable irq here to prevent further events.
- *
- * Notice this maybe result in nested disable if the interrupt type is
- * INTx, but it's OK for we are going to free it.
- *
- * If this function is a part of VM destroy, please ensure that till
- * now, the kvm state is still legal for probably we also have to wait
- * on a currently running IRQ handler.
- */
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
- int i;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- disable_irq(assigned_dev->host_msix_entries[i].vector);
-
- for (i = 0; i < assigned_dev->entries_nr; i++)
- free_irq(assigned_dev->host_msix_entries[i].vector,
- assigned_dev);
-
- assigned_dev->entries_nr = 0;
- kfree(assigned_dev->host_msix_entries);
- kfree(assigned_dev->guest_msix_entries);
- pci_disable_msix(assigned_dev->dev);
- } else {
- /* Deal with MSI and INTx */
- if ((assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_HOST_INTX) &&
- (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- pci_intx(assigned_dev->dev, false);
- spin_unlock_irq(&assigned_dev->intx_lock);
- synchronize_irq(assigned_dev->host_irq);
- } else
- disable_irq(assigned_dev->host_irq);
-
- free_irq(assigned_dev->host_irq, assigned_dev);
-
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
- }
-
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev,
- unsigned long irq_requested_type)
-{
- unsigned long guest_irq_type, host_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return -EINVAL;
- /* no irq assignment to deassign */
- if (!assigned_dev->irq_requested_type)
- return -ENXIO;
-
- host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
- guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
- if (host_irq_type)
- deassign_host_irq(kvm, assigned_dev);
- if (guest_irq_type)
- deassign_guest_irq(kvm, assigned_dev);
-
- return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
-{
- kvm_free_assigned_irq(kvm, assigned_dev);
-
- pci_reset_function(assigned_dev->dev);
- if (pci_load_and_free_saved_state(assigned_dev->dev,
- &assigned_dev->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&assigned_dev->dev->dev));
- else
- pci_restore_state(assigned_dev->dev);
-
- pci_clear_dev_assigned(assigned_dev->dev);
-
- pci_release_regions(assigned_dev->dev);
- pci_disable_device(assigned_dev->dev);
- pci_dev_put(assigned_dev->dev);
-
- list_del(&assigned_dev->list);
- kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
- struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
-
- list_for_each_entry_safe(assigned_dev, tmp,
- &kvm->arch.assigned_dev_head, list) {
- kvm_free_assigned_device(kvm, assigned_dev);
- }
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- irq_handler_t irq_handler;
- unsigned long flags;
-
- dev->host_irq = dev->dev->irq;
-
- /*
- * We can only share the IRQ line with other host devices if we are
- * able to disable the IRQ source at device-level - independently of
- * the guest driver. Otherwise host devices may suffer from unbounded
- * IRQ latencies when the guest keeps the line asserted.
- */
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- irq_handler = kvm_assigned_dev_intx;
- flags = IRQF_SHARED;
- } else {
- irq_handler = NULL;
- flags = IRQF_ONESHOT;
- }
- if (request_threaded_irq(dev->host_irq, irq_handler,
- kvm_assigned_dev_thread_intx, flags,
- dev->irq_name, dev))
- return -EIO;
-
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- spin_lock_irq(&dev->intx_lock);
- pci_intx(dev->dev, true);
- spin_unlock_irq(&dev->intx_lock);
- }
- return 0;
-}
-
-static int assigned_device_enable_host_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int r;
-
- if (!dev->dev->msi_enabled) {
- r = pci_enable_msi(dev->dev);
- if (r)
- return r;
- }
-
- dev->host_irq = dev->dev->irq;
- if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
- kvm_assigned_dev_thread_msi, 0,
- dev->irq_name, dev)) {
- pci_disable_msi(dev->dev);
- return -EIO;
- }
-
- return 0;
-}
-
-static int assigned_device_enable_host_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int i, r = -EINVAL;
-
- /* host_msix_entries and guest_msix_entries should have been
- * initialized */
- if (dev->entries_nr == 0)
- return r;
-
- r = pci_enable_msix_exact(dev->dev,
- dev->host_msix_entries, dev->entries_nr);
- if (r)
- return r;
-
- for (i = 0; i < dev->entries_nr; i++) {
- r = request_threaded_irq(dev->host_msix_entries[i].vector,
- kvm_assigned_dev_msix,
- kvm_assigned_dev_thread_msix,
- 0, dev->irq_name, dev);
- if (r)
- goto err;
- }
-
- return 0;
-err:
- for (i -= 1; i >= 0; i--)
- free_irq(dev->host_msix_entries[i].vector, dev);
- pci_disable_msix(dev->dev);
- return r;
-}
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = irq->guest_irq;
- return 0;
-}
-
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- __u32 host_irq_type)
-{
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
- return r;
-
- snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
- pci_name(dev->dev));
-
- switch (host_irq_type) {
- case KVM_DEV_IRQ_HOST_INTX:
- r = assigned_device_enable_host_intx(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSI:
- r = assigned_device_enable_host_msi(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSIX:
- r = assigned_device_enable_host_msix(kvm, dev);
- break;
- default:
- r = -EINVAL;
- }
- dev->host_irq_disabled = false;
-
- if (!r)
- dev->irq_requested_type |= host_irq_type;
-
- return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq,
- unsigned long guest_irq_type)
-{
- int id;
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
- return r;
-
- id = kvm_request_irq_source_id(kvm);
- if (id < 0)
- return id;
-
- dev->irq_source_id = id;
-
- switch (guest_irq_type) {
- case KVM_DEV_IRQ_GUEST_INTX:
- r = assigned_device_enable_guest_intx(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSI:
- r = assigned_device_enable_guest_msi(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSIX:
- r = assigned_device_enable_guest_msix(kvm, dev, irq);
- break;
- default:
- r = -EINVAL;
- }
-
- if (!r) {
- dev->irq_requested_type |= guest_irq_type;
- if (dev->ack_notifier.gsi != -1)
- kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
- } else {
- kvm_free_irq_source_id(kvm, dev->irq_source_id);
- dev->irq_source_id = -1;
- }
-
- return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
- struct kvm_assigned_irq *assigned_irq)
-{
- int r = -EINVAL;
- struct kvm_assigned_dev_kernel *match;
- unsigned long host_irq_type, guest_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return r;
-
- mutex_lock(&kvm->lock);
- r = -ENODEV;
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
- guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
- r = -EINVAL;
- /* can only assign one type at a time */
- if (hweight_long(host_irq_type) > 1)
- goto out;
- if (hweight_long(guest_irq_type) > 1)
- goto out;
- if (host_irq_type == 0 && guest_irq_type == 0)
- goto out;
-
- r = 0;
- if (host_irq_type)
- r = assign_host_irq(kvm, match, host_irq_type);
- if (r)
- goto out;
-
- if (guest_irq_type)
- r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
- struct kvm_assigned_irq
- *assigned_irq)
-{
- int r = -ENODEV;
- struct kvm_assigned_dev_kernel *match;
- unsigned long irq_type;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
- KVM_DEV_IRQ_GUEST_MASK);
- r = kvm_deassign_irq(kvm, match, irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device. To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs. PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file. We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
- int i;
- bool bar_found = false;
-
- for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
- char *kpath, *syspath;
- struct path path;
- struct inode *inode;
- int r;
-
- if (!pci_resource_len(dev, i))
- continue;
-
- kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
- if (!kpath)
- return -ENOMEM;
-
- /* Per sysfs-rules, sysfs is always at /sys */
- syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
- kfree(kpath);
- if (!syspath)
- return -ENOMEM;
-
- r = kern_path(syspath, LOOKUP_FOLLOW, &path);
- kfree(syspath);
- if (r)
- return r;
-
- inode = d_backing_inode(path.dentry);
-
- r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
- path_put(&path);
- if (r)
- return r;
-
- bar_found = true;
- }
-
- /* If no resources, probably something special */
- if (!bar_found)
- return -EPERM;
-
- return 0;
-#else
- return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0, idx;
- struct kvm_assigned_dev_kernel *match;
- struct pci_dev *dev;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- idx = srcu_read_lock(&kvm->srcu);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (match) {
- /* device already assigned */
- r = -EEXIST;
- goto out;
- }
-
- match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
- if (match == NULL) {
- printk(KERN_INFO "%s: Couldn't allocate memory\n",
- __func__);
- r = -ENOMEM;
- goto out;
- }
- dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
- assigned_dev->busnr,
- assigned_dev->devfn);
- if (!dev) {
- printk(KERN_INFO "%s: host device not found\n", __func__);
- r = -EINVAL;
- goto out_free;
- }
-
- /* Don't allow bridges to be assigned */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
- r = -EPERM;
- goto out_put;
- }
-
- r = probe_sysfs_permissions(dev);
- if (r)
- goto out_put;
-
- if (pci_enable_device(dev)) {
- printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
- r = -EBUSY;
- goto out_put;
- }
- r = pci_request_regions(dev, "kvm_assigned_device");
- if (r) {
- printk(KERN_INFO "%s: Could not get access to device regions\n",
- __func__);
- goto out_disable;
- }
-
- pci_reset_function(dev);
- pci_save_state(dev);
- match->pci_saved_state = pci_store_saved_state(dev);
- if (!match->pci_saved_state)
- printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
- __func__, dev_name(&dev->dev));
-
- if (!pci_intx_mask_supported(dev))
- assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
- match->assigned_dev_id = assigned_dev->assigned_dev_id;
- match->host_segnr = assigned_dev->segnr;
- match->host_busnr = assigned_dev->busnr;
- match->host_devfn = assigned_dev->devfn;
- match->flags = assigned_dev->flags;
- match->dev = dev;
- spin_lock_init(&match->intx_lock);
- spin_lock_init(&match->intx_mask_lock);
- match->irq_source_id = -1;
- match->kvm = kvm;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
- list_add(&match->list, &kvm->arch.assigned_dev_head);
-
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match->dev);
- if (r)
- goto out_list_del;
-
-out:
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-out_list_del:
- if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&dev->dev));
- list_del(&match->list);
- pci_release_regions(dev);
-out_disable:
- pci_disable_device(dev);
-out_put:
- pci_dev_put(dev);
-out_free:
- kfree(match);
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- printk(KERN_INFO "%s: device hasn't been assigned before, "
- "so cannot be deassigned\n", __func__);
- r = -EINVAL;
- goto out;
- }
-
- kvm_deassign_device(kvm, match->dev);
-
- kvm_free_assigned_device(kvm, match);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
- struct kvm_assigned_msix_nr *entry_nr)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry_nr->assigned_dev_id);
- if (!adev) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- if (adev->entries_nr == 0) {
- adev->entries_nr = entry_nr->entry_nr;
- if (adev->entries_nr == 0 ||
- adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
- entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->host_msix_entries) {
- r = -ENOMEM;
- goto msix_nr_out;
- }
- adev->guest_msix_entries =
- kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->guest_msix_entries) {
- kfree(adev->host_msix_entries);
- r = -ENOMEM;
- goto msix_nr_out;
- }
- } else /* Not allowed set MSI-X number twice */
- r = -EINVAL;
-msix_nr_out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
- struct kvm_assigned_msix_entry *entry)
-{
- int r = 0, i;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry->assigned_dev_id);
-
- if (!adev) {
- r = -EINVAL;
- goto msix_entry_out;
- }
-
- for (i = 0; i < adev->entries_nr; i++)
- if (adev->guest_msix_entries[i].vector == 0 ||
- adev->guest_msix_entries[i].entry == entry->entry) {
- adev->guest_msix_entries[i].entry = entry->entry;
- adev->guest_msix_entries[i].vector = entry->gsi;
- adev->host_msix_entries[i].entry = entry->entry;
- break;
- }
- if (i == adev->entries_nr) {
- r = -ENOSPC;
- goto msix_entry_out;
- }
-
-msix_entry_out:
- mutex_unlock(&kvm->lock);
-
- return r;
-}
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- r = -ENODEV;
- goto out;
- }
-
- spin_lock(&match->intx_mask_lock);
-
- match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
- match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
- if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
- if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
- kvm_set_irq(match->kvm, match->irq_source_id,
- match->guest_irq, 0, false);
- /*
- * Masking at hardware-level is performed on demand,
- * i.e. when an IRQ actually arrives at the host.
- */
- } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- /*
- * Unmask the IRQ line if required. Unmasking at
- * device level will be performed by user space.
- */
- spin_lock_irq(&match->intx_lock);
- if (match->host_irq_disabled) {
- enable_irq(match->host_irq);
- match->host_irq_disabled = false;
- }
- spin_unlock_irq(&match->intx_lock);
- }
- }
-
- spin_unlock(&match->intx_mask_lock);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- int r;
-
- switch (ioctl) {
- case KVM_ASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_IRQ: {
- r = -EOPNOTSUPP;
- break;
- }
- case KVM_ASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_NR: {
- struct kvm_assigned_msix_nr entry_nr;
- r = -EFAULT;
- if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
- goto out;
- r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_ENTRY: {
- struct kvm_assigned_msix_entry entry;
- r = -EFAULT;
- if (copy_from_user(&entry, argp, sizeof entry))
- goto out;
- r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_INTX_MASK: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
- break;
- }
- default:
- r = -ENOTTY;
- break;
- }
-out:
- return r;
-}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
deleted file mode 100644
index a428c1a211b2..000000000000
--- a/arch/x86/kvm/assigned-dev.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
-#define ARCH_X86_KVM_ASSIGNED_DEV_H
-
-#include <linux/kvm_host.h>
-
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
-
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-#else
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
-
-#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 73ea24d4f119..bdcd4139eca9 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
__releases(&s->lock)
{
bool wakeup = s->wakeup_needed;
- struct kvm_vcpu *vcpu, *found = NULL;
+ struct kvm_vcpu *vcpu;
int i;
s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
if (wakeup) {
kvm_for_each_vcpu(i, vcpu, s->kvm) {
if (kvm_apic_accept_pic_intr(vcpu)) {
- found = vcpu;
- break;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
}
}
-
- if (!found)
- return;
-
- kvm_make_request(KVM_REQ_EVENT, found);
- kvm_vcpu_kick(found);
}
}
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
s->output = 0;
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
return intno;
}
-void kvm_pic_reset(struct kvm_kpic_state *s)
+static void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, i;
struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
return ret;
}
-static u32 pic_ioport_read(void *opaque, u32 addr1)
+static u32 pic_ioport_read(void *opaque, u32 addr)
{
struct kvm_kpic_state *s = opaque;
- unsigned int addr;
int ret;
- addr = addr1;
- addr &= 1;
if (s->poll) {
- ret = pic_poll_read(s, addr1);
+ ret = pic_poll_read(s, addr);
s->poll = 0;
} else
- if (addr == 0)
+ if ((addr & 1) == 0)
if (s->read_reg_select)
ret = s->isr;
else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
return s->elcr;
}
-static int picdev_in_range(gpa_t addr)
-{
- switch (addr) {
- case 0x20:
- case 0x21:
- case 0xa0:
- case 0xa1:
- case 0x4d0:
- case 0x4d1:
- return 1;
- default:
- return 0;
- }
-}
-
static int picdev_write(struct kvm_pic *s,
gpa_t addr, int len, const void *val)
{
unsigned char data = *(unsigned char *)val;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
if (len != 1) {
pr_pic_unimpl("non byte write\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
+ pic_lock(s);
pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
+ pic_lock(s);
elcr_ioport_write(&s->pics[addr & 1], addr, data);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- pic_unlock(s);
return 0;
}
static int picdev_read(struct kvm_pic *s,
gpa_t addr, int len, void *val)
{
- unsigned char data = 0;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
+ unsigned char *data = (unsigned char *)val;
if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
- data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_lock(s);
+ *data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
- data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_lock(s);
+ *data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- *(unsigned char *)val = data;
- pic_unlock(s);
return 0;
}
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
*/
static void pic_irq_request(struct kvm *kvm, int level)
{
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
if (!s->output)
s->wakeup_needed = true;
@@ -657,9 +637,14 @@ void kvm_pic_destroy(struct kvm *kvm)
{
struct kvm_pic *vpic = kvm->arch.vpic;
+ if (!vpic)
+ return;
+
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vpic = NULL;
kfree(vpic);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5c07d2..bdff437acbcb 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
spin_unlock(&ioapic->lock);
}
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
- if (!ioapic)
+ if (!ioapic_in_kernel(kvm))
return;
kvm_make_scan_ioapic_request(kvm);
}
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+ kvm_make_scan_ioapic_request(ioapic->kvm);
break;
}
}
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
- return ret;
}
- kvm_vcpu_request_scan_ioapic(kvm);
return ret;
}
@@ -635,37 +631,36 @@ void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ if (!ioapic)
+ return;
+
cancel_delayed_work_sync(&ioapic->eoi_inject);
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
- return 0;
}
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
ioapic->irr_delivered = 0;
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_make_scan_ioapic_request(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
- return 0;
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54436db..29ce19732ccf 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -105,17 +105,13 @@ do { \
#define ASSERT(x) do { } while (0)
#endif
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vioapic;
-}
-
static inline int ioapic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (ioapic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
deleted file mode 100644
index b181426f67b4..000000000000
--- a/arch/x86/kvm/iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <[email protected]>
- * Author: Weidong Han <[email protected]>
- * Author: Ben-Ami Yassour <[email protected]>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/iommu.h>
-#include "assigned-dev.h"
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
- allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages);
-
-static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long npages)
-{
- gfn_t end_gfn;
- kvm_pfn_t pfn;
-
- pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + npages;
- gfn += 1;
-
- if (is_error_noslot_pfn(pfn))
- return pfn;
-
- while (gfn < end_gfn)
- gfn_to_pfn_memslot(slot, gfn++);
-
- return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = 0; i < npages; ++i)
- kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- gfn_t gfn, end_gfn;
- kvm_pfn_t pfn;
- int r = 0;
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int flags;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- gfn = slot->base_gfn;
- end_gfn = gfn + slot->npages;
-
- flags = IOMMU_READ;
- if (!(slot->flags & KVM_MEM_READONLY))
- flags |= IOMMU_WRITE;
- if (!kvm->arch.iommu_noncoherent)
- flags |= IOMMU_CACHE;
-
-
- while (gfn < end_gfn) {
- unsigned long page_size;
-
- /* Check if already mapped */
- if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
- gfn += 1;
- continue;
- }
-
- /* Get the page size we could use to map */
- page_size = kvm_host_page_size(kvm, gfn);
-
- /* Make sure the page_size does not exceed the memslot */
- while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
- page_size >>= 1;
-
- /* Make sure gfn is aligned to the page size we want to map */
- while ((gfn << PAGE_SHIFT) & (page_size - 1))
- page_size >>= 1;
-
- /* Make sure hva is aligned to the page size we want to map */
- while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
- page_size >>= 1;
-
- /*
- * Pin all pages we are about to map in memory. This is
- * important because we unmap and unpin in 4kb steps later.
- */
- pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
- if (is_error_noslot_pfn(pfn)) {
- gfn += 1;
- continue;
- }
-
- /* Map into IO address space */
- r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
- page_size, flags);
- if (r) {
- printk(KERN_ERR "kvm_iommu_map_address:"
- "iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
- goto unmap_pages;
- }
-
- gfn += page_size >> PAGE_SHIFT;
-
- cond_resched();
- }
-
- return 0;
-
-unmap_pages:
- kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
- return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
- int idx, r = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_register_noncoherent_dma(kvm);
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots) {
- r = kvm_iommu_map_pages(kvm, memslot);
- if (r)
- break;
- }
- srcu_read_unlock(&kvm->srcu, idx);
-
- return r;
-}
-
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int r;
- bool noncoherent;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- r = iommu_attach_device(domain, &pdev->dev);
- if (r) {
- dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
- return r;
- }
-
- noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
- /* Check if need to update IOMMU page table for guest memory */
- if (noncoherent != kvm->arch.iommu_noncoherent) {
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_noncoherent = noncoherent;
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- goto out_unmap;
- }
-
- kvm_arch_start_assignment(kvm);
- pci_set_dev_assigned(pdev);
-
- dev_info(&pdev->dev, "kvm assign device\n");
-
- return 0;
-out_unmap:
- kvm_iommu_unmap_memslots(kvm);
- return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- iommu_detach_device(domain, &pdev->dev);
-
- pci_clear_dev_assigned(pdev);
- kvm_arch_end_assignment(kvm);
-
- dev_info(&pdev->dev, "kvm deassign device\n");
-
- return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
- int r;
-
- if (!iommu_present(&pci_bus_type)) {
- printk(KERN_ERR "%s: iommu not found\n", __func__);
- return -ENODEV;
- }
-
- mutex_lock(&kvm->slots_lock);
-
- kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
- if (!kvm->arch.iommu_domain) {
- r = -ENOMEM;
- goto out_unlock;
- }
-
- if (!allow_unsafe_assigned_interrupts &&
- !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
- printk(KERN_WARNING "%s: No interrupt remapping support,"
- " disallowing device assignment."
- " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
- " module option.\n", __func__);
- iommu_domain_free(kvm->arch.iommu_domain);
- kvm->arch.iommu_domain = NULL;
- r = -EPERM;
- goto out_unlock;
- }
-
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
-{
- struct iommu_domain *domain;
- gfn_t end_gfn, gfn;
- kvm_pfn_t pfn;
- u64 phys;
-
- domain = kvm->arch.iommu_domain;
- end_gfn = base_gfn + npages;
- gfn = base_gfn;
-
- /* check if iommu exists and in use */
- if (!domain)
- return;
-
- while (gfn < end_gfn) {
- unsigned long unmap_pages;
- size_t size;
-
- /* Get physical address */
- phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
- if (!phys) {
- gfn++;
- continue;
- }
-
- pfn = phys >> PAGE_SHIFT;
-
- /* Unmap address from IO address space */
- size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
- unmap_pages = 1ULL << get_order(size);
-
- /* Unpin all pages we just unmapped to not leak any memory */
- kvm_unpin_pages(kvm, pfn, unmap_pages);
-
- gfn += unmap_pages;
-
- cond_resched();
- }
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
- int idx;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots)
- kvm_iommu_unmap_pages(kvm, memslot);
-
- srcu_read_unlock(&kvm->srcu, idx);
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_unregister_noncoherent_dma(kvm);
-
- return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- mutex_lock(&kvm->slots_lock);
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_domain = NULL;
- kvm->arch.iommu_noncoherent = false;
- mutex_unlock(&kvm->slots_lock);
-
- iommu_domain_free(domain);
- return 0;
-}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9d160c..5c24811e8b0b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
if (irqchip_split(v->kvm))
return pending_userspace_extint(v);
else
- return pic_irqchip(v->kvm)->output;
+ return v->kvm->arch.vpic->output;
} else
return 0;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 40d5b2cf6061..0edd22c3344c 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vpic;
-}
-
static inline int pic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (pic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_split(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_SPLIT;
}
static inline int irqchip_kernel(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
+ int mode = kvm->arch.irqchip_mode;
- /* Matches with wmb after initializing kvm->irq_routing. */
+ /* Matches smp_wmb() when setting irqchip_mode */
smp_rmb();
- return ret;
+ return mode > KVM_IRQCHIP_INIT_IN_PROGRESS;
}
-void kvm_pic_reset(struct kvm_kpic_state *s);
-
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6825cd36d13b..4517a4c2ac3a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- struct kvm_pic *pic = pic_irqchip(kvm);
+ struct kvm_pic *pic = kvm->arch.vpic;
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
}
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
- if (!ioapic_in_kernel(kvm))
+ if (!irqchip_kernel(kvm))
goto unlock;
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
- kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+ kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
unlock:
mutex_unlock(&kvm->irq_lock);
}
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
- int delta;
- unsigned max_pin;
+ /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */
+ if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE)
+ return -EINVAL;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
- delta = 0;
+ if (irqchip_split(kvm))
+ return -EINVAL;
+ e->irqchip.pin = ue->u.irqchip.pin;
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
- delta = 8;
+ e->irqchip.pin += PIC_NUM_PINS / 2;
/* fall through */
case KVM_IRQCHIP_PIC_MASTER:
- if (!pic_in_kernel(kvm))
- goto out;
-
+ if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
+ return -EINVAL;
e->set = kvm_set_pic_irq;
- max_pin = PIC_NUM_PINS;
break;
case KVM_IRQCHIP_IOAPIC:
- if (!ioapic_in_kernel(kvm))
- goto out;
-
- max_pin = KVM_IOAPIC_NUM_PINS;
+ if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+ return -EINVAL;
e->set = kvm_set_ioapic_irq;
break;
default:
- goto out;
+ return -EINVAL;
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
- e->irqchip.pin = ue->u.irqchip.pin + delta;
- if (e->irqchip.pin >= max_pin)
- goto out;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
if (kvm_msi_route_invalid(kvm, e))
- goto out;
+ return -EINVAL;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
default:
- goto out;
+ return -EINVAL;
}
- r = 0;
-out:
- return r;
+ return 0;
}
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac7810513d0e..558676538fca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->nx = true;
+ context->ept_ad = accessed_dirty;
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddc56e91f2e4..d8ccb32f7308 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -74,7 +74,8 @@ enum {
int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 37942e419c32..60168cdd0546 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
}
+void kvm_page_track_cleanup(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+ cleanup_srcu_struct(&head->track_srcu);
+}
+
void kvm_page_track_init(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a01105485315..314d2071b337 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -23,13 +23,6 @@
* so the code in this file is compiled twice, once per pte size.
*/
-/*
- * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
- * uses for EPT without A/D paging type.
- */
-extern u64 __pure __using_nonexistent_pte_bit(void)
- __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
-
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
@@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#define CMPXCHG cmpxchg
@@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#define CMPXCHG cmpxchg
#elif PTTYPE == PTTYPE_EPT
#define pt_element_t u64
@@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK 0
- #define PT_GUEST_DIRTY_MASK 0
- #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
- #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
+ #define PT_GUEST_DIRTY_SHIFT 9
+ #define PT_GUEST_ACCESSED_SHIFT 8
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
#else
#error Invalid PTTYPE value
#endif
+#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT)
+#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
+
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
@@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
-static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
+static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+ unsigned gpte)
{
unsigned mask;
/* dirty bit is not supported, so no need to track it */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return;
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
@@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
goto no_present;
/* if accessed bit is not supported prefetch non accessed gpte */
- if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK))
+ if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
return false;
@@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
int ret;
/* dirty/accessed bits are not supported, so no need to update them */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return 0;
for (level = walker->max_level; level >= walker->level; --level) {
@@ -286,7 +280,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+ unsigned nested_access;
gpa_t pte_gpa;
+ bool have_ad;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
@@ -299,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
retry_walk:
walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu);
+ have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
if (walker->level == PT32E_ROOT_LEVEL) {
@@ -312,7 +309,15 @@ retry_walk:
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
- accessed_dirty = PT_GUEST_ACCESSED_MASK;
+ accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
+
+ /*
+ * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
+ * by the MOV to CR instruction are treated as reads and do not cause the
+ * processor to set the dirty flag in any EPT paging-structure entry.
+ */
+ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
+
pt_access = pte_access = ACC_ALL;
++walker->level;
@@ -332,7 +337,7 @@ retry_walk:
walker->pte_gpa[walker->level - 1] = pte_gpa;
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- PFERR_USER_MASK|PFERR_WRITE_MASK,
+ nested_access,
&walker->fault);
/*
@@ -394,7 +399,7 @@ retry_walk:
walker->gfn = real_gpa >> PAGE_SHIFT;
if (!write_fault)
- FNAME(protect_clean_gpte)(&pte_access, pte);
+ FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
else
/*
* On a write fault, fold the dirty bit into accessed_dirty.
@@ -485,7 +490,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
if (is_error_pfn(pfn))
@@ -979,7 +984,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))
@@ -1025,3 +1030,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef PT_GUEST_DIRTY_MASK
#undef PT_GUEST_DIRTY_SHIFT
#undef PT_GUEST_ACCESSED_SHIFT
+#undef PT_HAVE_ACCESSED_DIRTY
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c62b3f..1b203abf76e1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm)
unsigned long flags;
struct kvm_arch *vm_data = &kvm->arch;
+ if (!avic)
+ return;
+
avic_free_vm_id(vm_data->avic_vm_id);
if (vm_data->avic_logical_id_table_page)
@@ -5253,6 +5256,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
+static void svm_setup_mce(struct kvm_vcpu *vcpu)
+{
+ /* [63:9] are reserved. */
+ vcpu->arch.mcg_cap &= 0x1ff;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5364,6 +5373,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
+ .setup_mce = svm_setup_mce,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa8601833..c1a12b94e1fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -615,10 +612,6 @@ struct vcpu_vmx {
int vpid;
bool emulation_required;
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
u32 exit_reason;
/* Posted interrupt descriptor */
@@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -1239,6 +1230,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
+static inline bool cpu_has_vmx_invvpid(void)
+{
+ return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
static inline bool cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1285,11 +1281,6 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
-static inline bool cpu_has_virtual_nmis(void)
-{
- return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2235,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
if (!already_loaded) {
+ loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2321,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_vcpu_pi_put(vcpu);
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
- kvm_cpu_vmxoff();
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -2749,11 +2730,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high);
vmx->nested.nested_vmx_secondary_ctls_low = 0;
vmx->nested.nested_vmx_secondary_ctls_high &=
+ SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
@@ -2764,14 +2745,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
- VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
- VMX_EPT_INVEPT_BIT;
+ VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
vmx->nested.nested_vmx_ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
- VMX_EPT_EXTENT_CONTEXT_BIT;
+ VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
+ VMX_EPT_1GB_PAGE_BIT;
+ if (enable_ept_ad_bits)
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -2781,10 +2764,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* though it is treated as global context. The alternative is
* not failing the single-context invvpid, and it is worse.
*/
- if (enable_vpid)
+ if (enable_vpid) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_VPID;
vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
VMX_VPID_EXTENT_SUPPORTED_MASK;
- else
+ } else
vmx->nested.nested_vmx_vpid_caps = 0;
if (enable_unrestricted_guest)
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
- cr4_set_bits(X86_CR4_VMXE);
-
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
+ kvm_cpu_vmxon(phys_addr);
+ ept_sync_global();
native_store_gdt(this_cpu_ptr(&host_gdt));
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static void hardware_disable(void)
{
- if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
- kvm_cpu_vmxoff();
- }
- cr4_clear_bits(X86_CR4_VMXE);
+ vmclear_local_loaded_vmcss();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3617,9 +3597,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+ PIN_BASED_VIRTUAL_NMIS;
+ opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -4011,11 +3991,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
{
- vpid_sync_context(vpid);
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+ } else {
+ vpid_sync_context(vpid);
}
}
@@ -4024,6 +4005,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
}
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+ if (enable_ept)
+ vmx_flush_tlb(vcpu);
+}
+
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5285,8 +5272,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
- vmx->soft_vnmi_blocked = 0;
-
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -5406,8 +5391,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis() ||
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5448,19 +5432,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!is_guest_mode(vcpu)) {
- if (!cpu_has_virtual_nmis()) {
- /*
- * Tracking the NMI-blocked state in software is built upon
- * finding the next open IRQ window. This, in turn, depends on
- * well-behaving guests: They have to keep IRQs disabled at
- * least as long as the NMI handler runs. Otherwise we may
- * cause NMI nesting, maybe breaking the guest. But as this is
- * highly unlikely, we can live with the residual risk.
- */
- vmx->soft_vnmi_blocked = 1;
- vmx->vnmi_blocked_time = 0;
- }
-
++vcpu->stat.nmi_injections;
vmx->nmi_known_unmasked = false;
}
@@ -5477,8 +5448,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis())
- return to_vmx(vcpu)->soft_vnmi_blocked;
if (to_vmx(vcpu)->nmi_known_unmasked)
return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5488,20 +5457,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!cpu_has_virtual_nmis()) {
- if (vmx->soft_vnmi_blocked != masked) {
- vmx->soft_vnmi_blocked = masked;
- vmx->vnmi_blocked_time = 0;
- }
- } else {
- vmx->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- }
+ vmx->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5509,9 +5471,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
- return 0;
-
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6232,21 +6191,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
unsigned long exit_qualification;
gpa_t gpa;
u32 error_code;
- int gla_validity;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- gla_validity = (exit_qualification >> 7) & 0x3;
- if (gla_validity == 0x2) {
- printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
- printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- vmcs_readl(GUEST_LINEAR_ADDRESS));
- printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
- (long unsigned int)exit_qualification);
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
- return 0;
+ if (is_guest_mode(vcpu)
+ && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
+ /*
+ * Fix up exit_qualification according to whether guest
+ * page table accesses are reads or writes.
+ */
+ u64 eptp = nested_ept_get_cr3(vcpu);
+ if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
+ exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
}
/*
@@ -6256,7 +6212,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6517,8 +6472,10 @@ static __init int hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
- if (!cpu_has_vmx_vpid())
+ if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+ !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs)
@@ -7258,9 +7215,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 zero = 0;
gpa_t vmptr;
- struct vmcs12 *vmcs12;
- struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -7271,22 +7227,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- /*
- * For accurate processor emulation, VMCLEAR beyond available
- * physical memory should do nothing at all. However, it is
- * possible that a nested vmx bug, not a guest hypervisor bug,
- * resulted in this case, so let's shut down before doing any
- * more damage:
- */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 1;
- }
- vmcs12 = kmap(page);
- vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12, launch_state),
+ &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
@@ -7819,7 +7762,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8121,6 +8063,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+ case EXIT_REASON_RDRAND:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+ case EXIT_REASON_RDSEED:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@ -8491,31 +8437,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
- !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
- get_vmcs12(vcpu))))) {
- if (vmx_interrupt_allowed(vcpu)) {
- vmx->soft_vnmi_blocked = 0;
- } else if (vmx->vnmi_blocked_time > 1000000000LL &&
- vcpu->arch.nmi_pending) {
- /*
- * This CPU don't support us in finding the end of an
- * NMI-blocked window if the guest runs with IRQs
- * disabled. So we pull the trigger after 1 s of
- * futile waiting, but inform the user about this.
- */
- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
- "state on VCPU %d after 1 s timeout\n",
- __func__, vcpu->vcpu_id);
- vmx->soft_vnmi_blocked = 0;
- }
- }
-
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+ exit_reason);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -8561,6 +8488,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
} else {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmx_flush_tlb_ept_only(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
@@ -8586,8 +8514,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
*/
if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(&vmx->vcpu),
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
vmcs_write64(APIC_ACCESS_ADDR, hpa);
+ vmx_flush_tlb_ept_only(vcpu);
+ }
}
static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8782,37 +8712,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (cpu_has_virtual_nmis()) {
- if (vmx->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
- } else if (unlikely(vmx->soft_vnmi_blocked))
- vmx->vnmi_blocked_time +=
- ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+ if (vmx->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8929,10 +8855,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr4;
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
- vmx->entry_time = ktime_get();
-
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
@@ -9140,16 +9062,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_complete_interrupts(vmx);
}
-static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
- if (vmx->loaded_vmcs == &vmx->vmcs01)
+ if (vmx->loaded_vmcs == vmcs)
return;
cpu = get_cpu();
- vmx->loaded_vmcs = &vmx->vmcs01;
+ vmx->loaded_vmcs = vmcs;
vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu;
@@ -9167,7 +9089,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
BUG_ON(r);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
}
@@ -9228,11 +9150,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
goto free_msrs;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9492,17 +9410,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
+ u64 eptp;
+
WARN_ON(mmu_is_nested(vcpu));
+ eptp = nested_ept_get_cr3(vcpu);
+ if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+ return 1;
+
+ kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.nested_vmx_ept_caps &
- VMX_EPT_EXECUTE_ONLY_BIT);
+ VMX_EPT_EXECUTE_ONLY_BIT,
+ eptp & VMX_EPT_AD_ENABLE_BIT);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+ return 0;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -9694,10 +9621,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
return false;
page = nested_get_page(vcpu, vmcs12->msr_bitmap);
- if (!page) {
- WARN_ON(1);
+ if (!page)
return false;
- }
msr_bitmap_l1 = (unsigned long *)kmap(page);
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -9990,7 +9915,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- bool nested_ept_enabled = false;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10137,8 +10061,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_intr_status);
}
- nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
-
/*
* Write an illegal value to APIC_ACCESS_ADDR. Later,
* nested_get_vmcs12_pages will either fix it up or
@@ -10269,8 +10191,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if (nested_cpu_has_ept(vmcs12)) {
- kvm_mmu_unload(vcpu);
- nested_ept_init_mmu_context(vcpu);
+ if (nested_ept_init_mmu_context(vcpu)) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
+ } else if (nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
/*
@@ -10298,12 +10225,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmx_set_efer(vcpu, vcpu->arch.efer);
/* Shadow page tables on either EPT or shadow page tables. */
- if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
return 1;
- kvm_mmu_reset_context(vcpu);
-
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
@@ -10423,7 +10348,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct loaded_vmcs *vmcs02;
- int cpu;
u32 msr_entry_idx;
u32 exit_qual;
@@ -10436,18 +10360,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- cpu = get_cpu();
- vmx->loaded_vmcs = vmcs02;
- vmx_vcpu_put(vcpu);
- vmx_vcpu_load(vcpu, cpu);
- vcpu->cpu = cpu;
- put_cpu();
-
+ vmx_switch_vmcs(vcpu, vmcs02);
vmx_segment_cache_clear(vmx);
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, exit_qual);
return 1;
@@ -10460,7 +10378,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
vmcs12->vm_entry_msr_load_count);
if (msr_entry_idx) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
return 1;
@@ -11028,7 +10946,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (unlikely(vmx->fail))
vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
&& nested_exit_intr_ack_set(vcpu)) {
@@ -11072,6 +10990,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
vmx_set_virtual_x2apic_mode(vcpu,
vcpu->arch.apic_base & X2APIC_ENABLE);
+ } else if (!nested_cpu_has_ept(vmcs12) &&
+ nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
/* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -11121,8 +11043,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
+ }
free_nested(to_vmx(vcpu));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1faf620a6fdc..34bf64fb4dea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,7 +27,6 @@
#include "kvm_cache_regs.h"
#include "x86.h"
#include "cpuid.h"
-#include "assigned-dev.h"
#include "pmu.h"
#include "hyperv.h"
@@ -1444,10 +1443,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 usdiff;
bool matched;
bool already_matched;
u64 data = msr->data;
+ bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1455,51 +1454,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
- int faulted = 0;
-
- /* n.b - signed multiplication and division required */
- usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
- usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
- /* do_div() only does unsigned */
- asm("1: idivl %[divisor]\n"
- "2: xor %%edx, %%edx\n"
- " movl $0, %[faulted]\n"
- "3:\n"
- ".section .fixup,\"ax\"\n"
- "4: movl $1, %[faulted]\n"
- " jmp 3b\n"
- ".previous\n"
-
- _ASM_EXTABLE(1b, 4b)
-
- : "=A"(usdiff), [faulted] "=r" (faulted)
- : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
- do_div(elapsed, 1000);
- usdiff -= elapsed;
- if (usdiff < 0)
- usdiff = -usdiff;
-
- /* idivl overflow => difference is larger than USEC_PER_SEC */
- if (faulted)
- usdiff = USEC_PER_SEC;
- } else
- usdiff = USEC_PER_SEC; /* disable TSC match window below */
+ if (data == 0 && msr->host_initiated) {
+ /*
+ * detection of vcpu initialization -- need to sync
+ * with other vCPUs. This particularly helps to keep
+ * kvm_clock stable after CPU hotplug
+ */
+ synchronizing = true;
+ } else {
+ u64 tsc_exp = kvm->arch.last_tsc_write +
+ nsec_to_cycles(vcpu, elapsed);
+ u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+ /*
+ * Special case: TSC write with a small delta (1 second)
+ * of virtual cycle time against real time is
+ * interpreted as an attempt to synchronize the CPU.
+ */
+ synchronizing = data < tsc_exp + tsc_hz &&
+ data + tsc_hz > tsc_exp;
+ }
+ }
/*
- * Special case: TSC write with a small delta (1 second) of virtual
- * cycle time against real time is interpreted as an attempt to
- * synchronize the CPU.
- *
* For a reliable TSC, we can match TSC offsets, and for an unstable
* TSC, we add elapsed time in this computation. We could let the
* compensation code attempt to catch up if we fall behind, but
* it's better to try to match offsets from the beginning.
*/
- if (usdiff < USEC_PER_SEC &&
+ if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
@@ -2155,6 +2137,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
case MSR_AMD64_BU_CFG2:
+ case MSR_AMD64_DC_CFG:
break;
case MSR_EFER:
@@ -2417,6 +2400,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
case MSR_IA32_PERF_CTL:
+ case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2675,10 +2659,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_ASSIGN_DEV_IRQ:
- case KVM_CAP_PCI_2_3:
-#endif
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
@@ -2695,9 +2675,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
@@ -2713,11 +2690,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_IOMMU:
- r = iommu_present(&pci_bus_type);
- break;
-#endif
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
@@ -3124,7 +3096,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
if (events->exception.injected &&
- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
+ is_guest_mode(vcpu)))
+ return -EINVAL;
+
+ /* INITs are latched while in SMM */
+ if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
+ (events->smi.smm || events->smi.pending) &&
+ vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
return -EINVAL;
process_nmi(vcpu);
@@ -3721,22 +3700,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[0],
+ memcpy(&chip->chip.pic, &pic->pics[0],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_PIC_SLAVE:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[1],
+ memcpy(&chip->chip.pic, &pic->pics[1],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+ kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -3747,32 +3725,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[0],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[0], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[1],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[1], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+ kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
break;
}
- kvm_pic_update_irq(pic_irqchip(kvm));
+ kvm_pic_update_irq(pic);
return r;
}
@@ -3934,9 +3911,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
goto split_irqchip_unlock;
if (kvm->created_vcpus)
goto split_irqchip_unlock;
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_empty_irq_routing(kvm);
- if (r)
+ if (r) {
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
goto split_irqchip_unlock;
+ }
/* Pairs with irqchip_in_kernel. */
smp_wmb();
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
@@ -4018,20 +4000,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
- mutex_lock(&kvm->irq_lock);
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
kvm_ioapic_destroy(kvm);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->irq_lock);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4230,7 +4210,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
default:
- r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
+ r = -ENOTTY;
}
out:
return r;
@@ -7355,6 +7335,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
return -EINVAL;
+ /* INITs are latched while in SMM */
+ if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+ (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
+ mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
+ return -EINVAL;
+
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -8068,7 +8054,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
{
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
}
@@ -8152,12 +8137,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
if (kvm_x86_ops->vm_destroy)
kvm_x86_ops->vm_destroy(kvm);
- kvm_iommu_unmap_guest(kvm);
- kfree(kvm->arch.vpic);
- kfree(kvm->arch.vioapic);
+ kvm_pic_destroy(kvm);
+ kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kvm_mmu_uninit_vm(kvm);
+ kvm_page_track_cleanup(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -8566,11 +8551,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
{
struct x86_exception fault;
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3..4c90cfdc128b 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
#include <linux/bootmem.h>
#include <linux/kasan.h>
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 5126dfd52b18..cd44ae727df7 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
* we might run off the end of the bounds table if we are on
* a 64-bit kernel and try to get 8 bytes.
*/
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
long __user *bd_entry_ptr)
{
u32 bd_entry_32;
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index a7dbec4dce27..3dbde04febdc 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
# MISC Devices
obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
new file mode 100644
index 000000000000..a6c3705a28ad
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+ {
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+ .name = "msic_power_btn",
+ .id = PLATFORM_DEVID_NONE,
+ .num_resources = ARRAY_SIZE(mrfld_power_btn_resources),
+ .resource = mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ if (code == SCU_DOWN) {
+ platform_device_unregister(&mrfld_power_btn_dev);
+ return 0;
+ }
+
+ return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+ .notifier_call = mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+ if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+ return -ENODEV;
+
+ /*
+ * We need to be sure that the SCU IPC is ready before
+ * PMIC power button device can be registered:
+ */
+ intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+ return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+ struct resource *res = mrfld_power_btn_resources;
+ struct sfi_device_table_entry *pentry = info;
+
+ res->start = res->end = pentry->irq;
+ return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+ .name = "bcove_power_btn",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .msic = 1,
+ .get_platform_data = &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 86edd1e941eb..9e304e2ea4f5 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -19,7 +19,7 @@
#include <asm/intel_scu_ipc.h>
#include <asm/io_apic.h>
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
static struct platform_device wdt_dev = {
.name = "intel_mid_wdt",
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index e793fe509971..e42978d4deaf 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -17,16 +17,6 @@
#include "intel_mid_weak_decls.h"
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
- .arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
static unsigned long __init mfld_calibrate_tsc(void)
{
unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
static void __init penwell_arch_setup(void)
{
x86_platform.calibrate_tsc = mfld_calibrate_tsc;
- pm_power_off = mfld_power_off;
}
+static struct intel_mid_ops penwell_ops = {
+ .arch_setup = penwell_arch_setup,
+};
+
void *get_penwell_ops(void)
{
return &penwell_ops;
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b6d5c8946e66..470edad96bb9 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -10,22 +10,19 @@
* Version 2. See the file COPYING for more details.
*/
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
#include "sha256.h"
-#include "purgatory.h"
#include "../boot/string.h"
-struct sha_region {
- unsigned long start;
- unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
/*
* On x86, second kernel requries first 640K of memory to boot. Copy
@@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {};
*/
static int copy_backup_region(void)
{
- if (backup_dest)
- memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+ if (purgatory_backup_dest) {
+ memcpy((void *)purgatory_backup_dest,
+ (void *)purgatory_backup_src, purgatory_backup_sz);
+ }
return 0;
}
static int verify_sha256_digest(void)
{
- struct sha_region *ptr, *end;
+ struct kexec_sha_region *ptr, *end;
u8 digest[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
sha256_init(&sctx);
- end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
- for (ptr = sha_regions; ptr < end; ptr++)
+ end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+ for (ptr = purgatory_sha_regions; ptr < end; ptr++)
sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
sha256_final(&sctx, digest);
- if (memcmp(digest, sha256_digest, sizeof(digest)))
+ if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
return 1;
return 0;
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
deleted file mode 100644
index e2e365a6c192..000000000000
--- a/arch/x86/purgatory/purgatory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif /* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index f90e9dfa90bb..dfae9b9e60b5 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,7 +9,7 @@
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
-#include "purgatory.h"
+#include <asm/purgatory.h>
.text
.globl purgatory_start
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
index bd15a4127735..2867d9825a57 100644
--- a/arch/x86/purgatory/sha256.h
+++ b/arch/x86/purgatory/sha256.h
@@ -10,7 +10,6 @@
#ifndef SHA256_H
#define SHA256_H
-
#include <linux/types.h>
#include <crypto/sha.h>
diff --git a/block/bio.c b/block/bio.c
index 5eec5e08417f..e75878f8b14a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
bio_list_init(&punt);
bio_list_init(&nopunt);
- while ((bio = bio_list_pop(current->bio_list)))
+ while ((bio = bio_list_pop(&current->bio_list[0])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[0] = nopunt;
- *current->bio_list = nopunt;
+ bio_list_init(&nopunt);
+ while ((bio = bio_list_pop(&current->bio_list[1])))
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[1] = nopunt;
spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
* we retry with the original gfp_flags.
*/
- if (current->bio_list && !bio_list_empty(current->bio_list))
+ if (current->bio_list &&
+ (!bio_list_empty(&current->bio_list[0]) ||
+ !bio_list_empty(&current->bio_list[1])))
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 0eeb99ef654f..d772c221cc17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1973,7 +1973,14 @@ end_io:
*/
blk_qc_t generic_make_request(struct bio *bio)
{
- struct bio_list bio_list_on_stack;
+ /*
+ * bio_list_on_stack[0] contains bios submitted by the current
+ * make_request_fn.
+ * bio_list_on_stack[1] contains bios that were submitted before
+ * the current make_request_fn, but that haven't been processed
+ * yet.
+ */
+ struct bio_list bio_list_on_stack[2];
blk_qc_t ret = BLK_QC_T_NONE;
if (!generic_make_request_checks(bio))
@@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio)
* should be added at the tail
*/
if (current->bio_list) {
- bio_list_add(current->bio_list, bio);
+ bio_list_add(&current->bio_list[0], bio);
goto out;
}
@@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio)
* bio_list, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
- bio_list_init(&bio_list_on_stack);
- current->bio_list = &bio_list_on_stack;
+ bio_list_init(&bio_list_on_stack[0]);
+ current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
- struct bio_list hold;
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
- hold = bio_list_on_stack;
- bio_list_init(&bio_list_on_stack);
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
@@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio)
*/
bio_list_init(&lower);
bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
/* now assemble so we handle the lowest level first */
- bio_list_merge(&bio_list_on_stack, &lower);
- bio_list_merge(&bio_list_on_stack, &same);
- bio_list_merge(&bio_list_on_stack, &hold);
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
bio_io_error(bio);
}
- bio = bio_list_pop(current->bio_list);
+ bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
current->bio_list = NULL; /* deactivate */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e48bc2c72615..9d97bfc4d465 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
+ if (!tags)
+ continue;
+
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->static_rqs[j])
continue;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 159187a28d66..a4546f060e80 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
}
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+ bool may_sleep)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
@@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
}
insert:
- blk_mq_sched_insert_request(rq, false, true, true, false);
+ blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
}
/*
@@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
- blk_mq_try_issue_directly(old_rq, &cookie);
+ blk_mq_try_issue_directly(old_rq, &cookie, false);
rcu_read_unlock();
} else {
srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
- blk_mq_try_issue_directly(old_rq, &cookie);
+ blk_mq_try_issue_directly(old_rq, &cookie, true);
srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
}
goto done;
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index f5e18c2a4852..690deca17c35 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -266,7 +266,7 @@ unlock:
return err;
}
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
@@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
if (!type)
goto unlock;
- sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+ sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
err = -ENOMEM;
if (!sk2)
goto unlock;
@@ -323,9 +323,10 @@ unlock:
}
EXPORT_SYMBOL_GPL(af_alg_accept);
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
- return af_alg_accept(sock->sk, newsock);
+ return af_alg_accept(sock->sk, newsock, kern);
}
static const struct proto_ops alg_proto_ops = {
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 54fc90e8339c..5e92bd275ef3 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,8 @@ unlock:
return err ?: len;
}
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
@@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
if (err)
return err;
- err = af_alg_accept(ask->parent, newsock);
+ err = af_alg_accept(ask->parent, newsock, kern);
if (err)
return err;
@@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
}
static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
int err;
@@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
if (err)
return err;
- return hash_accept(sock, newsock, flags);
+ return hash_accept(sock, newsock, flags, kern);
}
static struct proto_ops algif_hash_ops_nokey = {
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 4467a8089ab8..0143135b3abe 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
void __weak arch_unregister_cpu(int cpu) {}
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
- return -ENODEV;
-}
-
static int acpi_processor_hotadd_init(struct acpi_processor *pr)
{
unsigned long long sta;
@@ -285,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device)
pr->acpi_id = value;
}
+ if (acpi_duplicate_processor_id(pr->acpi_id)) {
+ dev_err(&device->dev,
+ "Failed to get unique processor _UID (0x%x)\n",
+ pr->acpi_id);
+ return -ENODEV;
+ }
+
pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration,
pr->acpi_id);
if (invalid_phys_cpuid(pr->phys_id))
@@ -585,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = {
static int nr_unique_ids __initdata;
/* The number of the duplicate processor IDs */
-static int nr_duplicate_ids __initdata;
+static int nr_duplicate_ids;
/* Used to store the unique processor IDs */
static int unique_processor_ids[] __initdata = {
@@ -593,7 +595,7 @@ static int unique_processor_ids[] __initdata = {
};
/* Used to store the duplicate processor IDs */
-static int duplicate_processor_ids[] __initdata = {
+static int duplicate_processor_ids[] = {
[0 ... NR_CPUS - 1] = -1,
};
@@ -638,28 +640,53 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
void **rv)
{
acpi_status status;
+ acpi_object_type acpi_type;
+ unsigned long long uid;
union acpi_object object = { 0 };
struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
- status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+ status = acpi_get_type(handle, &acpi_type);
if (ACPI_FAILURE(status))
- acpi_handle_info(handle, "Not get the processor object\n");
- else
- processor_validated_ids_update(object.processor.proc_id);
+ return false;
+
+ switch (acpi_type) {
+ case ACPI_TYPE_PROCESSOR:
+ status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ goto err;
+ uid = object.processor.proc_id;
+ break;
+
+ case ACPI_TYPE_DEVICE:
+ status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+ if (ACPI_FAILURE(status))
+ goto err;
+ break;
+ default:
+ goto err;
+ }
+
+ processor_validated_ids_update(uid);
+ return true;
+
+err:
+ acpi_handle_info(handle, "Invalid processor object\n");
+ return false;
- return AE_OK;
}
-static void __init acpi_processor_check_duplicates(void)
+void __init acpi_processor_check_duplicates(void)
{
- /* Search all processor nodes in ACPI namespace */
+ /* check the correctness for all processors in ACPI namespace */
acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
ACPI_UINT32_MAX,
acpi_processor_ids_walk,
NULL, NULL, NULL);
+ acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk,
+ NULL, NULL);
}
-bool __init acpi_processor_validate_proc_id(int proc_id)
+bool acpi_duplicate_processor_id(int proc_id)
{
int i;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 80cb5eb75b63..34fbe027e73a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
- acpi_set_processor_mapping();
return 0;
}
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 611a5585a902..b933061b6b60 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void)
}
static int map_lapic_id(struct acpi_subtable_header *entry,
- u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
+ u32 acpi_id, phys_cpuid_t *apic_id)
{
struct acpi_madt_local_apic *lapic =
container_of(entry, struct acpi_madt_local_apic, header);
- if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
+ if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (lapic->processor_id != acpi_id)
@@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
}
static int map_x2apic_id(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
- bool ignore_disabled)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
{
struct acpi_madt_local_x2apic *apic =
container_of(entry, struct acpi_madt_local_x2apic, header);
- if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
+ if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (device_declaration && (apic->uid == acpi_id)) {
@@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
}
static int map_lsapic_id(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
- bool ignore_disabled)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
{
struct acpi_madt_local_sapic *lsapic =
container_of(entry, struct acpi_madt_local_sapic, header);
- if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+ if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (device_declaration) {
@@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
* Retrieve the ARM CPU physical identifier (MPIDR)
*/
static int map_gicc_mpidr(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
- bool ignore_disabled)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
{
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
- if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
+ if (!(gicc->flags & ACPI_MADT_ENABLED))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the
@@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
}
static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
- int type, u32 acpi_id, bool ignore_disabled)
+ int type, u32 acpi_id)
{
unsigned long madt_end, entry;
phys_cpuid_t phys_id = PHYS_CPUID_INVALID; /* CPU hardware ID */
@@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
struct acpi_subtable_header *header =
(struct acpi_subtable_header *)entry;
if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
- if (!map_lapic_id(header, acpi_id, &phys_id,
- ignore_disabled))
+ if (!map_lapic_id(header, acpi_id, &phys_id))
break;
} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
- if (!map_x2apic_id(header, type, acpi_id, &phys_id,
- ignore_disabled))
+ if (!map_x2apic_id(header, type, acpi_id, &phys_id))
break;
} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
- if (!map_lsapic_id(header, type, acpi_id, &phys_id,
- ignore_disabled))
+ if (!map_lsapic_id(header, type, acpi_id, &phys_id))
break;
} else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
- if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
- ignore_disabled))
+ if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
break;
}
entry += header->length;
@@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
if (!madt)
return PHYS_CPUID_INVALID;
- rv = map_madt_entry(madt, 1, acpi_id, true);
+ rv = map_madt_entry(madt, 1, acpi_id);
acpi_put_table((struct acpi_table_header *)madt);
return rv;
}
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
- bool ignore_disabled)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
@@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
header = (struct acpi_subtable_header *)obj->buffer.pointer;
if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
- map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
+ map_lapic_id(header, acpi_id, &phys_id);
else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
- map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+ map_lsapic_id(header, type, acpi_id, &phys_id);
else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
- map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+ map_x2apic_id(header, type, acpi_id, &phys_id);
else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
- map_gicc_mpidr(header, type, acpi_id, &phys_id,
- ignore_disabled);
+ map_gicc_mpidr(header, type, acpi_id, &phys_id);
exit:
kfree(buffer.pointer);
return phys_id;
}
-static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
- u32 acpi_id, bool ignore_disabled)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
{
phys_cpuid_t phys_id;
- phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
+ phys_id = map_mat_entry(handle, type, acpi_id);
if (invalid_phys_cpuid(phys_id))
- phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
- ignore_disabled);
+ phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
return phys_id;
}
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
-{
- return __acpi_get_phys_id(handle, type, acpi_id, true);
-}
-
int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
{
#ifdef CONFIG_SMP
@@ -278,79 +262,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
}
EXPORT_SYMBOL_GPL(acpi_get_cpuid);
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
- int type, id;
- u32 acpi_id;
- acpi_status status;
- acpi_object_type acpi_type;
- unsigned long long tmp;
- union acpi_object object = { 0 };
- struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
- status = acpi_get_type(handle, &acpi_type);
- if (ACPI_FAILURE(status))
- return false;
-
- switch (acpi_type) {
- case ACPI_TYPE_PROCESSOR:
- status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
- if (ACPI_FAILURE(status))
- return false;
- acpi_id = object.processor.proc_id;
-
- /* validate the acpi_id */
- if(acpi_processor_validate_proc_id(acpi_id))
- return false;
- break;
- case ACPI_TYPE_DEVICE:
- status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
- if (ACPI_FAILURE(status))
- return false;
- acpi_id = tmp;
- break;
- default:
- return false;
- }
-
- type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
- *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
- id = acpi_map_cpuid(*phys_id, acpi_id);
-
- if (id < 0)
- return false;
- *cpuid = id;
- return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
- void **rv)
-{
- phys_cpuid_t phys_id;
- int cpu_id;
-
- if (!map_processor(handle, &phys_id, &cpu_id))
- return AE_ERROR;
-
- acpi_map_cpu2node(handle, cpu_id, phys_id);
- return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
- /* Set persistent cpu <-> node mapping for all processors. */
- acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, set_processor_node_mapping,
- NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
u64 *phys_addr, int *ioapic_id)
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 85d833289f28..4c96f3ac4976 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
case AHCI_LS1043A:
if (!qpriv->ecc_addr)
return -EINVAL;
- writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+ writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+ qpriv->ecc_addr);
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
if (qpriv->is_dmacoherent)
@@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
case AHCI_LS1046A:
if (!qpriv->ecc_addr)
return -EINVAL;
- writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+ writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+ qpriv->ecc_addr);
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
if (qpriv->is_dmacoherent)
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2bd92dca3e62..274d6d7193d7 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
break;
default:
- WARN_ON_ONCE(1);
return AC_ERR_SYSTEM;
}
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 46698232e6bf..19e6e539a061 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class,
static void ata_tport_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
@@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent,
device_initialize(dev);
dev->type = &ata_port_type;
- dev->parent = get_device(parent);
+ dev->parent = parent;
dev->release = ata_tport_release;
dev_set_name(dev, "ata%d", ap->print_id);
transport_setup_device(dev);
@@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class,
static void ata_tlink_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
@@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link)
int error;
device_initialize(dev);
- dev->parent = get_device(&ap->tdev);
+ dev->parent = &ap->tdev;
dev->release = ata_tlink_release;
if (ata_is_host_link(link))
dev_set_name(dev, "link%d", ap->print_id);
@@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class,
static void ata_tdev_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
@@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
int error;
device_initialize(dev);
- dev->parent = get_device(&link->tdev);
+ dev->parent = &link->tdev;
dev->release = ata_tdev_release;
if (ata_is_host_link(link))
dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 684bda4d14a1..6bb60fb6a30b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void)
return restart_syscall();
}
-void assert_held_device_hotplug(void)
-{
- lockdep_assert_held(&device_hotplug_lock);
-}
-
#ifdef CONFIG_BLOCK
static inline int device_is_not_partition(struct device *dev)
{
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 3ad86fdf954e..b1ad12552b56 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -397,9 +397,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
irq, err);
return err;
}
- omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
- priv->clk = of_clk_get(pdev->dev.of_node, 0);
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
return -EPROBE_DEFER;
if (!IS_ERR(priv->clk)) {
@@ -408,6 +407,19 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
dev_err(&pdev->dev, "unable to enable the clk, "
"err = %d\n", err);
}
+
+ /*
+ * On OMAP4, enabling the shutdown_oflo interrupt is
+ * done in the interrupt mask register. There is no
+ * such register on EIP76, and it's enabled by the
+ * same bit in the control register
+ */
+ if (priv->pdata->regs[RNG_INTMASK_REG])
+ omap_rng_write(priv, RNG_INTMASK_REG,
+ RNG_SHUTDOWN_OFLO_MASK);
+ else
+ omap_rng_write(priv, RNG_CONTROL_REG,
+ RNG_SHUTDOWN_OFLO_MASK);
}
return 0;
}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1ef26403bcc8..0ab024918907 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -313,13 +313,6 @@ static int random_read_wakeup_bits = 64;
static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
/*
- * The minimum number of seconds between urandom pool reseeding. We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
- */
-static int random_min_urandom_seed = 60;
-
-/*
* Originally, we used a primitive polynomial of degree .poolwords
* over GF(2). The taps for various sizes are defined below. They
* were chosen to be evenly spaced except for the last tap, which is 1
@@ -409,7 +402,6 @@ static struct poolinfo {
*/
static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
static struct fasync_struct *fasync;
static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -467,7 +459,6 @@ struct entropy_store {
int entropy_count;
int entropy_total;
unsigned int initialized:1;
- unsigned int limit:1;
unsigned int last_data_init:1;
__u8 last_data[EXTRACT_SIZE];
};
@@ -485,7 +476,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
static struct entropy_store input_pool = {
.poolinfo = &poolinfo_table[0],
.name = "input",
- .limit = 1,
.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
.pool = input_pool_data
};
@@ -493,7 +483,6 @@ static struct entropy_store input_pool = {
static struct entropy_store blocking_pool = {
.poolinfo = &poolinfo_table[1],
.name = "blocking",
- .limit = 1,
.pull = &input_pool,
.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
.pool = blocking_pool_data,
@@ -855,13 +844,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
spin_unlock_irqrestore(&primary_crng.lock, flags);
}
-static inline void maybe_reseed_primary_crng(void)
-{
- if (crng_init > 2 &&
- time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
- crng_reseed(&primary_crng, &input_pool);
-}
-
static inline void crng_wait_ready(void)
{
wait_event_interruptible(crng_init_wait, crng_ready());
@@ -1220,15 +1202,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
r->entropy_count > r->poolinfo->poolfracbits)
return;
- if (r->limit == 0 && random_min_urandom_seed) {
- unsigned long now = jiffies;
-
- if (time_before(now,
- r->last_pulled + random_min_urandom_seed * HZ))
- return;
- r->last_pulled = now;
- }
-
_xfer_secondary_pool(r, nbytes);
}
@@ -1236,8 +1209,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
{
__u32 tmp[OUTPUT_POOL_WORDS];
- /* For /dev/random's pool, always leave two wakeups' worth */
- int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
int bytes = nbytes;
/* pull at least as much as a wakeup */
@@ -1248,7 +1219,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
bytes = extract_entropy(r->pull, tmp, bytes,
- random_read_wakeup_bits / 8, rsvd_bytes);
+ random_read_wakeup_bits / 8, 0);
mix_pool_bytes(r, tmp, bytes);
credit_entropy_bits(r, bytes*8);
}
@@ -1276,7 +1247,7 @@ static void push_to_pool(struct work_struct *work)
static size_t account(struct entropy_store *r, size_t nbytes, int min,
int reserved)
{
- int entropy_count, orig;
+ int entropy_count, orig, have_bytes;
size_t ibytes, nfrac;
BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
@@ -1285,14 +1256,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
ibytes = nbytes;
- /* If limited, never pull more than available */
- if (r->limit) {
- int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+ /* never pull more than available */
+ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
- if ((have_bytes -= reserved) < 0)
- have_bytes = 0;
- ibytes = min_t(size_t, ibytes, have_bytes);
- }
+ if ((have_bytes -= reserved) < 0)
+ have_bytes = 0;
+ ibytes = min_t(size_t, ibytes, have_bytes);
if (ibytes < min)
ibytes = 0;
@@ -1912,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
static int min_read_thresh = 8, min_write_thresh;
static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
static char sysctl_bootid[16];
/*
@@ -2042,63 +2012,64 @@ struct ctl_table random_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
- get_random_bytes(random_int_secret, sizeof(random_int_secret));
- return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
- __aligned(sizeof(unsigned long));
+struct batched_entropy {
+ union {
+ u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+ u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+ };
+ unsigned int position;
+};
/*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
*/
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
{
- __u32 *hash;
- unsigned int ret;
+ u64 ret;
+ struct batched_entropy *batch;
- if (arch_get_random_int(&ret))
+#if BITS_PER_LONG == 64
+ if (arch_get_random_long((unsigned long *)&ret))
return ret;
+#else
+ if (arch_get_random_long((unsigned long *)&ret) &&
+ arch_get_random_long((unsigned long *)&ret + 1))
+ return ret;
+#endif
- hash = get_cpu_var(get_random_int_hash);
-
- hash[0] += current->pid + jiffies + random_get_entropy();
- md5_transform(hash, random_int_secret);
- ret = hash[0];
- put_cpu_var(get_random_int_hash);
-
+ batch = &get_cpu_var(batched_entropy_u64);
+ if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+ extract_crng((u8 *)batch->entropy_u64);
+ batch->position = 0;
+ }
+ ret = batch->entropy_u64[batch->position++];
+ put_cpu_var(batched_entropy_u64);
return ret;
}
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u64);
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
{
- __u32 *hash;
- unsigned long ret;
+ u32 ret;
+ struct batched_entropy *batch;
- if (arch_get_random_long(&ret))
+ if (arch_get_random_int(&ret))
return ret;
- hash = get_cpu_var(get_random_int_hash);
-
- hash[0] += current->pid + jiffies + random_get_entropy();
- md5_transform(hash, random_int_secret);
- ret = *(unsigned long *)hash;
- put_cpu_var(get_random_int_hash);
-
+ batch = &get_cpu_var(batched_entropy_u32);
+ if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+ extract_crng((u8 *)batch->entropy_u32);
+ batch->position = 0;
+ }
+ ret = batch->entropy_u32[batch->position++];
+ put_cpu_var(batched_entropy_u32);
return ret;
}
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u32);
/**
* randomize_page - Generate a random, page aligned address
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 745844ee973e..d4ca9962a759 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -10,7 +10,6 @@
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/atmel_tc.h>
-#include <linux/sched_clock.h>
/*
@@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs)
return (upper << 16) | lower;
}
-static u32 tc_get_cv32(void)
-{
- return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
-}
-
static u64 tc_get_cycles32(struct clocksource *cs)
{
- return tc_get_cv32();
+ return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
}
static struct clocksource clksrc = {
@@ -75,11 +69,6 @@ static struct clocksource clksrc = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static u64 notrace tc_read_sched_clock(void)
-{
- return tc_get_cv32();
-}
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS
struct tc_clkevt_device {
@@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void)
clksrc.read = tc_get_cycles32;
/* setup ony channel 0 */
tcb_setup_single_chan(tc, best_divisor_idx);
-
- /* register sched_clock on chips with single 32 bit counter */
- sched_clock_register(tc_read_sched_clock, 32, divided_rate);
} else {
/* tclib will give us three clocks no matter what the
* underlying platform supports.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 38b9fdf854a4..b8ff617d449d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
char *buf)
{
unsigned int cur_freq = __cpufreq_get(policy);
- if (!cur_freq)
- return sprintf(buf, "<unknown>");
- return sprintf(buf, "%u\n", cur_freq);
+
+ if (cur_freq)
+ return sprintf(buf, "%u\n", cur_freq);
+
+ return sprintf(buf, "<unknown>\n");
}
/**
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d37219a0dd7..08e134ffba68 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
return div64_u64(x << EXT_FRAC_BITS, y);
}
+static inline int32_t percent_ext_fp(int percent)
+{
+ return div_ext_fp(percent, 100);
+}
+
/**
* struct sample - Store performance sample
* @core_avg_perf: Ratio of APERF/MPERF which is the actual average
@@ -845,12 +850,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
{
- int min, hw_min, max, hw_max, cpu, range, adj_range;
+ int min, hw_min, max, hw_max, cpu;
struct perf_limits *perf_limits = limits;
u64 value, cap;
for_each_cpu(cpu, policy->cpus) {
- int max_perf_pct, min_perf_pct;
struct cpudata *cpu_data = all_cpu_data[cpu];
s16 epp;
@@ -863,20 +867,15 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
hw_max = HWP_GUARANTEED_PERF(cap);
else
hw_max = HWP_HIGHEST_PERF(cap);
- range = hw_max - hw_min;
- max_perf_pct = perf_limits->max_perf_pct;
- min_perf_pct = perf_limits->min_perf_pct;
+ min = fp_ext_toint(hw_max * perf_limits->min_perf);
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
- adj_range = min_perf_pct * range / 100;
- min = hw_min + adj_range;
+
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
- adj_range = max_perf_pct * range / 100;
- max = hw_min + adj_range;
-
+ max = fp_ext_toint(hw_max * perf_limits->max_perf);
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
@@ -989,6 +988,7 @@ static void intel_pstate_update_policies(void)
static int pid_param_set(void *data, u64 val)
{
*(u32 *)data = val;
+ pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
intel_pstate_reset_all_pid();
return 0;
}
@@ -1225,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
limits->max_perf_pct);
limits->max_perf_pct = max(limits->min_perf_pct,
limits->max_perf_pct);
- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+ limits->max_perf = percent_ext_fp(limits->max_perf_pct);
intel_pstate_update_policies();
@@ -1262,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
limits->min_perf_pct);
limits->min_perf_pct = min(limits->max_perf_pct,
limits->min_perf_pct);
- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+ limits->min_perf = percent_ext_fp(limits->min_perf_pct);
intel_pstate_update_policies();
@@ -2080,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
struct perf_limits *limits)
{
+ int32_t max_policy_perf, min_policy_perf;
- limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
- policy->cpuinfo.max_freq);
- limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+ max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+ max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
if (policy->max == policy->min) {
- limits->min_policy_pct = limits->max_policy_pct;
+ min_policy_perf = max_policy_perf;
} else {
- limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
- policy->cpuinfo.max_freq);
- limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
- 0, 100);
+ min_policy_perf = div_ext_fp(policy->min,
+ policy->cpuinfo.max_freq);
+ min_policy_perf = clamp_t(int32_t, min_policy_perf,
+ 0, max_policy_perf);
}
- /* Normalize user input to [min_policy_pct, max_policy_pct] */
- limits->min_perf_pct = max(limits->min_policy_pct,
- limits->min_sysfs_pct);
- limits->min_perf_pct = min(limits->max_policy_pct,
- limits->min_perf_pct);
- limits->max_perf_pct = min(limits->max_policy_pct,
- limits->max_sysfs_pct);
- limits->max_perf_pct = max(limits->min_policy_pct,
- limits->max_perf_pct);
+ /* Normalize user input to [min_perf, max_perf] */
+ limits->min_perf = max(min_policy_perf,
+ percent_ext_fp(limits->min_sysfs_pct));
+ limits->min_perf = min(limits->min_perf, max_policy_perf);
+ limits->max_perf = min(max_policy_perf,
+ percent_ext_fp(limits->max_sysfs_pct));
+ limits->max_perf = max(min_policy_perf, limits->max_perf);
- /* Make sure min_perf_pct <= max_perf_pct */
- limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+ /* Make sure min_perf <= max_perf */
+ limits->min_perf = min(limits->min_perf, limits->max_perf);
- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+ limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+ limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
limits->max_perf_pct, limits->min_perf_pct);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dce1af0ce85c..1b9da3dc799b 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
scatterwalk_done(&walk, out, 0);
}
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
{
if (dev->sg_dst_cpy) {
dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
}
s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
- /* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
dev->req->base.complete(&dev->req->base, err);
dev->busy = false;
}
@@ -368,51 +371,44 @@ exit:
}
/*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ * - 0 if there is no more data,
+ * - 1 if new transmitting (output) data is ready and its address+length
+ * have to be written to device (by calling s5p_set_dma_outdata()).
*/
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
{
- int err = 0;
- bool ret = false;
+ int ret = 0;
s5p_unset_outdata(dev);
if (!sg_is_last(dev->sg_dst)) {
- err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
- if (err)
- s5p_aes_complete(dev, err);
- else
- ret = true;
- } else {
- s5p_aes_complete(dev, err);
-
- dev->busy = true;
- tasklet_schedule(&dev->tasklet);
+ ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+ if (!ret)
+ ret = 1;
}
return ret;
}
/*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ * - 0 if there is no more data,
+ * - 1 if new receiving (input) data is ready and its address+length
+ * have to be written to device (by calling s5p_set_dma_indata()).
*/
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
{
- int err;
- bool ret = false;
+ int ret = 0;
s5p_unset_indata(dev);
if (!sg_is_last(dev->sg_src)) {
- err = s5p_set_indata(dev, sg_next(dev->sg_src));
- if (err)
- s5p_aes_complete(dev, err);
- else
- ret = true;
+ ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+ if (!ret)
+ ret = 1;
}
return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
{
struct platform_device *pdev = dev_id;
struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
- bool set_dma_tx = false;
- bool set_dma_rx = false;
+ int err_dma_tx = 0;
+ int err_dma_rx = 0;
+ bool tx_end = false;
unsigned long flags;
uint32_t status;
+ int err;
spin_lock_irqsave(&dev->lock, flags);
+ /*
+ * Handle rx or tx interrupt. If there is still data (scatterlist did not
+ * reach end), then map next scatterlist entry.
+ * In case of such mapping error, s5p_aes_complete() should be called.
+ *
+ * If there is no more data in tx scatter list, call s5p_aes_complete()
+ * and schedule new tasklet.
+ */
status = SSS_READ(dev, FCINTSTAT);
if (status & SSS_FCINTSTAT_BRDMAINT)
- set_dma_rx = s5p_aes_rx(dev);
- if (status & SSS_FCINTSTAT_BTDMAINT)
- set_dma_tx = s5p_aes_tx(dev);
+ err_dma_rx = s5p_aes_rx(dev);
+
+ if (status & SSS_FCINTSTAT_BTDMAINT) {
+ if (sg_is_last(dev->sg_dst))
+ tx_end = true;
+ err_dma_tx = s5p_aes_tx(dev);
+ }
SSS_WRITE(dev, FCINTPEND, status);
- /*
- * Writing length of DMA block (either receiving or transmitting)
- * will start the operation immediately, so this should be done
- * at the end (even after clearing pending interrupts to not miss the
- * interrupt).
- */
- if (set_dma_tx)
- s5p_set_dma_outdata(dev, dev->sg_dst);
- if (set_dma_rx)
- s5p_set_dma_indata(dev, dev->sg_src);
+ if (err_dma_rx < 0) {
+ err = err_dma_rx;
+ goto error;
+ }
+ if (err_dma_tx < 0) {
+ err = err_dma_tx;
+ goto error;
+ }
+
+ if (tx_end) {
+ s5p_sg_done(dev);
+
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ s5p_aes_complete(dev, 0);
+ dev->busy = true;
+ tasklet_schedule(&dev->tasklet);
+ } else {
+ /*
+ * Writing length of DMA block (either receiving or
+ * transmitting) will start the operation immediately, so this
+ * should be done at the end (even after clearing pending
+ * interrupts to not miss the interrupt).
+ */
+ if (err_dma_tx == 1)
+ s5p_set_dma_outdata(dev, dev->sg_dst);
+ if (err_dma_rx == 1)
+ s5p_set_dma_indata(dev, dev->sg_src);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ }
+
+ return IRQ_HANDLED;
+
+error:
+ s5p_sg_done(dev);
spin_unlock_irqrestore(&dev->lock, flags);
+ s5p_aes_complete(dev, err);
return IRQ_HANDLED;
}
@@ -597,8 +633,9 @@ outdata_error:
s5p_unset_indata(dev);
indata_error:
- s5p_aes_complete(dev, err);
+ s5p_sg_done(dev);
spin_unlock_irqrestore(&dev->lock, flags);
+ s5p_aes_complete(dev, err);
}
static void s5p_tasklet_cb(unsigned long data)
@@ -805,8 +842,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
}
- err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
- IRQF_SHARED, pdev->name, pdev);
+ err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+ s5p_aes_interrupt, IRQF_ONESHOT,
+ pdev->name, pdev);
if (err < 0) {
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 8d9829ff2a78..80c6db279ae1 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
int rc = VM_FAULT_SIGBUS;
phys_addr_t phys;
pfn_t pfn;
+ unsigned int fault_size = PAGE_SIZE;
if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
@@ -437,9 +438,12 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
+ if (fault_size != dax_region->align)
+ return VM_FAULT_SIGBUS;
+
phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
if (phys == -1) {
- dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+ dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
vmf->pgoff);
return VM_FAULT_SIGBUS;
}
@@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
phys_addr_t phys;
pgoff_t pgoff;
pfn_t pfn;
+ unsigned int fault_size = PMD_SIZE;
if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
@@ -480,10 +485,20 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
+ if (fault_size < dax_region->align)
+ return VM_FAULT_SIGBUS;
+ else if (fault_size > dax_region->align)
+ return VM_FAULT_FALLBACK;
+
+ /* if we are outside of the VMA */
+ if (pmd_addr < vmf->vma->vm_start ||
+ (pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+ return VM_FAULT_SIGBUS;
+
pgoff = linear_page_index(vmf->vma, pmd_addr);
phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
if (phys == -1) {
- dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+ dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
pgoff);
return VM_FAULT_SIGBUS;
}
@@ -503,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
phys_addr_t phys;
pgoff_t pgoff;
pfn_t pfn;
+ unsigned int fault_size = PUD_SIZE;
+
if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
@@ -519,10 +536,20 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
+ if (fault_size < dax_region->align)
+ return VM_FAULT_SIGBUS;
+ else if (fault_size > dax_region->align)
+ return VM_FAULT_FALLBACK;
+
+ /* if we are outside of the VMA */
+ if (pud_addr < vmf->vma->vm_start ||
+ (pud_addr + PUD_SIZE) > vmf->vma->vm_end)
+ return VM_FAULT_SIGBUS;
+
pgoff = linear_page_index(vmf->vma, pud_addr);
phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
if (phys == -1) {
- dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+ dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
pgoff);
return VM_FAULT_SIGBUS;
}
diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index 9e1a138fed53..16a8951b2bed 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
gpio->regmap = a10sr->regmap;
gpio->gp = altr_a10sr_gc;
-
+ gpio->gp.parent = pdev->dev.parent;
gpio->gp.of_node = pdev->dev.of_node;
ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 5bddbd507ca9..3fe6a21e05a5 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
- if (type == IRQ_TYPE_NONE)
+ if (type == IRQ_TYPE_NONE) {
+ irq_set_handler_locked(d, handle_bad_irq);
return 0;
- if (type == IRQ_TYPE_LEVEL_HIGH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
- return 0;
- if (type == IRQ_TYPE_EDGE_RISING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
- return 0;
- if (type == IRQ_TYPE_EDGE_FALLING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
- return 0;
- if (type == IRQ_TYPE_EDGE_BOTH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+ }
+ if (type == altera_gc->interrupt_trigger) {
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ irq_set_handler_locked(d, handle_level_irq);
+ else
+ irq_set_handler_locked(d, handle_simple_irq);
return 0;
-
+ }
+ irq_set_handler_locked(d, handle_bad_irq);
return -EINVAL;
}
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-
static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
{
struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
altera_gc->interrupt_trigger = reg;
ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
- handle_simple_irq, IRQ_TYPE_NONE);
+ handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(&pdev->dev, "could not add irqchip\n");
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index bdb692345428..2a57d024481d 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
static irqreturn_t mcp23s08_irq(int irq, void *data)
{
struct mcp23s08 *mcp = data;
- int intcap, intf, i;
+ int intcap, intf, i, gpio, gpio_orig, intcap_mask;
unsigned int child_irq;
+ bool intf_set, intcap_changed, gpio_bit_changed,
+ defval_changed, gpio_set;
mutex_lock(&mcp->lock);
if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
}
mcp->cache[MCP_INTCAP] = intcap;
+
+ /* This clears the interrupt(configurable on S18) */
+ if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
+ mutex_unlock(&mcp->lock);
+ return IRQ_HANDLED;
+ }
+ gpio_orig = mcp->cache[MCP_GPIO];
+ mcp->cache[MCP_GPIO] = gpio;
mutex_unlock(&mcp->lock);
+ if (mcp->cache[MCP_INTF] == 0) {
+ /* There is no interrupt pending */
+ return IRQ_HANDLED;
+ }
+
+ dev_dbg(mcp->chip.parent,
+ "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
+ intcap, intf, gpio_orig, gpio);
for (i = 0; i < mcp->chip.ngpio; i++) {
- if ((BIT(i) & mcp->cache[MCP_INTF]) &&
- ((BIT(i) & intcap & mcp->irq_rise) ||
- (mcp->irq_fall & ~intcap & BIT(i)) ||
- (BIT(i) & mcp->cache[MCP_INTCON]))) {
+ /* We must check all of the inputs on the chip,
+ * otherwise we may not notice a change on >=2 pins.
+ *
+ * On at least the mcp23s17, INTCAP is only updated
+ * one byte at a time(INTCAPA and INTCAPB are
+ * not written to at the same time - only on a per-bank
+ * basis).
+ *
+ * INTF only contains the single bit that caused the
+ * interrupt per-bank. On the mcp23s17, there is
+ * INTFA and INTFB. If two pins are changed on the A
+ * side at the same time, INTF will only have one bit
+ * set. If one pin on the A side and one pin on the B
+ * side are changed at the same time, INTF will have
+ * two bits set. Thus, INTF can't be the only check
+ * to see if the input has changed.
+ */
+
+ intf_set = BIT(i) & mcp->cache[MCP_INTF];
+ if (i < 8 && intf_set)
+ intcap_mask = 0x00FF;
+ else if (i >= 8 && intf_set)
+ intcap_mask = 0xFF00;
+ else
+ intcap_mask = 0x00;
+
+ intcap_changed = (intcap_mask &
+ (BIT(i) & mcp->cache[MCP_INTCAP])) !=
+ (intcap_mask & (BIT(i) & gpio_orig));
+ gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
+ gpio_bit_changed = (BIT(i) & gpio_orig) !=
+ (BIT(i) & mcp->cache[MCP_GPIO]);
+ defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
+ ((BIT(i) & mcp->cache[MCP_GPIO]) !=
+ (BIT(i) & mcp->cache[MCP_DEFVAL]));
+
+ if (((gpio_bit_changed || intcap_changed) &&
+ (BIT(i) & mcp->irq_rise) && gpio_set) ||
+ ((gpio_bit_changed || intcap_changed) &&
+ (BIT(i) & mcp->irq_fall) && !gpio_set) ||
+ defval_changed) {
child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
handle_nested_irq(child_irq);
}
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 06dac72cb69c..d99338689213 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
struct seq_file *sfile;
struct gpio_desc *desc;
struct gpio_chip *gc;
- int status, val;
+ int val;
char buf;
sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
chip = priv->chip;
gc = &chip->gc;
- status = copy_from_user(&buf, usr_buf, 1);
- if (status)
- return status;
+ if (copy_from_user(&buf, usr_buf, 1))
+ return -EFAULT;
if (buf == '0')
val = 0;
diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index 40a8881c2ce8..f1c6ec17b90a 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -42,9 +42,7 @@ struct xgene_gpio {
struct gpio_chip chip;
void __iomem *base;
spinlock_t lock;
-#ifdef CONFIG_PM
u32 set_dr_val[XGENE_MAX_GPIO_BANKS];
-#endif
};
static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
return 0;
}
-#ifdef CONFIG_PM
-static int xgene_gpio_suspend(struct device *dev)
+static __maybe_unused int xgene_gpio_suspend(struct device *dev)
{
struct xgene_gpio *gpio = dev_get_drvdata(dev);
unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
return 0;
}
-static int xgene_gpio_resume(struct device *dev)
+static __maybe_unused int xgene_gpio_resume(struct device *dev)
{
struct xgene_gpio *gpio = dev_get_drvdata(dev);
unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
}
static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
-#define XGENE_GPIO_PM_OPS (&xgene_gpio_pm)
-#else
-#define XGENE_GPIO_PM_OPS NULL
-#endif
static int xgene_gpio_probe(struct platform_device *pdev)
{
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
.name = "xgene-gpio",
.of_match_table = xgene_gpio_of_match,
.acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
- .pm = XGENE_GPIO_PM_OPS,
+ .pm = &xgene_gpio_pm,
},
.probe = xgene_gpio_probe,
};
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
index 8363cb57915b..8a08e81ee90d 100644
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -3,6 +3,4 @@
# of AMDSOC/AMDGPU drm driver.
# It provides the HW control for ACP related functionalities.
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d2d0f60ff36d..99424cb8020b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -240,6 +240,8 @@ free_partial_kdata:
for (; i >= 0; i--)
drm_free_large(p->chunks[i].kdata);
kfree(p->chunks);
+ p->chunks = NULL;
+ p->nchunks = 0;
put_ctx:
amdgpu_ctx_put(p->ctx);
free_chunk:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4120b351a8e5..a3a105ec99e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
use_bank = 0;
}
- *pos &= 0x3FFFF;
+ *pos &= (1UL << 22) - 1;
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
use_bank = 0;
}
- *pos &= 0x3FFFF;
+ *pos &= (1UL << 22) - 1;
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index f55e45b52fbc..33b504bafb88 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
(adev->pdev->device == 0x6667)) {
max_sclk = 75000;
}
+ } else if (adev->asic_type == CHIP_OLAND) {
+ if ((adev->pdev->device == 0x6604) &&
+ (adev->pdev->subsystem_vendor == 0x1028) &&
+ (adev->pdev->subsystem_device == 0x066F)) {
+ max_sclk = 75000;
+ }
}
if (rps->vce_active) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 50bdb24ef8d6..4a785d6acfb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle)
/* rev0 hardware requires workarounds to support PG */
adev->pg_flags = 0;
if (adev->rev_id != 0x00) {
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ adev->pg_flags |=
AMD_PG_SUPPORT_GFX_SMG |
AMD_PG_SUPPORT_GFX_PIPELINE |
AMD_PG_SUPPORT_CP |
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index 8cf71f3c6d0e..261b828ad590 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
if (bgate) {
cgs_set_powergating_state(hwmgr->device,
AMD_IP_BLOCK_TYPE_VCE,
- AMD_PG_STATE_UNGATE);
+ AMD_PG_STATE_GATE);
cgs_set_clockgating_state(hwmgr->device,
AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index 08e6a71f5d05..294b53697334 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
clk_prepare_enable(hwdev->pxlclk);
- /* mclk needs to be set to the same or higher rate than pxlclk */
- clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000);
+ /* We rely on firmware to set mclk to a sensible level. */
clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000);
hwdev->modeset(hwdev, &vm);
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index 488aedf5b58d..9f5513006eee 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = {
{ DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
{ DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE },
{ DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
- { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 },
+ { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
};
#define MALIDP_DE_DEFAULT_PREFETCH_START 5
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 414aada10fe5..d5aec082294c 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -37,6 +37,8 @@
#define LAYER_V_VAL(x) (((x) & 0x1fff) << 16)
#define MALIDP_LAYER_COMP_SIZE 0x010
#define MALIDP_LAYER_OFFSET 0x014
+#define MALIDP550_LS_ENABLE 0x01c
+#define MALIDP550_LS_R1_IN_SIZE 0x020
/*
* This 4-entry look-up-table is used to determine the full 8-bit alpha value
@@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane,
LAYER_V_VAL(plane->state->crtc_y),
mp->layer->base + MALIDP_LAYER_OFFSET);
+ if (mp->layer->id == DE_SMART)
+ malidp_hw_write(mp->hwdev,
+ LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h),
+ mp->layer->base + MALIDP550_LS_R1_IN_SIZE);
+
/* first clear the rotation bits */
val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL);
val &= ~LAYER_ROT_MASK;
@@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm)
plane->hwdev = malidp->dev;
plane->layer = &map->layers[i];
- /* Skip the features which the SMART layer doesn't have */
- if (id == DE_SMART)
+ if (id == DE_SMART) {
+ /*
+ * Enable the first rectangle in the SMART layer to be
+ * able to use it as a drm plane.
+ */
+ malidp_hw_write(malidp->dev, 1,
+ plane->layer->base + MALIDP550_LS_ENABLE);
+ /* Skip the features which the SMART layer doesn't have. */
continue;
+ }
drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index aff6d4a84e99..b816067a65c5 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -84,6 +84,7 @@
/* Stride register offsets relative to Lx_BASE */
#define MALIDP_DE_LG_STRIDE 0x18
#define MALIDP_DE_LV_STRIDE0 0x18
+#define MALIDP550_DE_LS_R1_STRIDE 0x28
/* macros to set values into registers */
#define MALIDP_DE_H_FRONTPORCH(x) (((x) & 0xfff) << 0)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a4b42d31391..7febe6eecf72 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -293,6 +293,7 @@ enum plane_id {
PLANE_PRIMARY,
PLANE_SPRITE0,
PLANE_SPRITE1,
+ PLANE_SPRITE2,
PLANE_CURSOR,
I915_MAX_PLANES,
};
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6908123162d1..10777da73039 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+ ret = -ENODEV;
+ if (obj->ops->pwrite)
+ ret = obj->ops->pwrite(obj, args);
+ if (ret != -ENODEV)
+ goto err;
+
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
@@ -2119,6 +2125,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
*/
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
obj->mm.madv = __I915_MADV_PURGED;
+ obj->mm.pages = ERR_PTR(-EFAULT);
}
/* Try to discard unwanted pages */
@@ -2218,7 +2225,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
__i915_gem_object_reset_page_iter(obj);
- obj->ops->put_pages(obj, pages);
+ if (!IS_ERR(pages))
+ obj->ops->put_pages(obj, pages);
+
unlock:
mutex_unlock(&obj->mm.lock);
}
@@ -2437,7 +2446,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
if (err)
return err;
- if (unlikely(!obj->mm.pages)) {
+ if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
err = ____i915_gem_object_get_pages(obj);
if (err)
goto unlock;
@@ -2515,7 +2524,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
pinned = true;
if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
- if (unlikely(!obj->mm.pages)) {
+ if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
ret = ____i915_gem_object_get_pages(obj);
if (ret)
goto err_unlock;
@@ -2563,6 +2572,75 @@ err_unlock:
goto out_unlock;
}
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pwrite *arg)
+{
+ struct address_space *mapping = obj->base.filp->f_mapping;
+ char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+ u64 remain, offset;
+ unsigned int pg;
+
+ /* Before we instantiate/pin the backing store for our use, we
+ * can prepopulate the shmemfs filp efficiently using a write into
+ * the pagecache. We avoid the penalty of instantiating all the
+ * pages, important if the user is just writing to a few and never
+ * uses the object on the GPU, and using a direct write into shmemfs
+ * allows it to avoid the cost of retrieving a page (either swapin
+ * or clearing-before-use) before it is overwritten.
+ */
+ if (READ_ONCE(obj->mm.pages))
+ return -ENODEV;
+
+ /* Before the pages are instantiated the object is treated as being
+ * in the CPU domain. The pages will be clflushed as required before
+ * use, and we can freely write into the pages directly. If userspace
+ * races pwrite with any other operation; corruption will ensue -
+ * that is userspace's prerogative!
+ */
+
+ remain = arg->size;
+ offset = arg->offset;
+ pg = offset_in_page(offset);
+
+ do {
+ unsigned int len, unwritten;
+ struct page *page;
+ void *data, *vaddr;
+ int err;
+
+ len = PAGE_SIZE - pg;
+ if (len > remain)
+ len = remain;
+
+ err = pagecache_write_begin(obj->base.filp, mapping,
+ offset, len, 0,
+ &page, &data);
+ if (err < 0)
+ return err;
+
+ vaddr = kmap(page);
+ unwritten = copy_from_user(vaddr + pg, user_data, len);
+ kunmap(page);
+
+ err = pagecache_write_end(obj->base.filp, mapping,
+ offset, len, len - unwritten,
+ page, data);
+ if (err < 0)
+ return err;
+
+ if (unwritten)
+ return -EFAULT;
+
+ remain -= len;
+ user_data += len;
+ offset += len;
+ pg = 0;
+ } while (remain);
+
+ return 0;
+}
+
static bool ban_context(const struct i915_gem_context *ctx)
{
return (i915_gem_context_is_bannable(ctx) &&
@@ -3029,6 +3107,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
if (args->timeout_ns < 0)
args->timeout_ns = 0;
+
+ /*
+ * Apparently ktime isn't accurate enough and occasionally has a
+ * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+ * things up to make the test happy. We allow up to 1 jiffy.
+ *
+ * This is a regression from the timespec->ktime conversion.
+ */
+ if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+ args->timeout_ns = 0;
}
i915_gem_object_put(obj);
@@ -3974,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE,
+
.get_pages = i915_gem_object_get_pages_gtt,
.put_pages = i915_gem_object_put_pages_gtt,
+
+ .pwrite = i915_gem_object_pwrite_gtt,
};
struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index c181b1bb3d2c..3be2503aa042 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* those as well to make room for our guard pages.
*/
if (check_color) {
- if (vma->node.start + vma->node.size == node->start) {
- if (vma->node.color == node->color)
+ if (node->start + node->size == target->start) {
+ if (node->color == target->color)
continue;
}
- if (vma->node.start == node->start + node->size) {
- if (vma->node.color == node->color)
+ if (node->start == target->start + target->size) {
+ if (node->color == target->color)
continue;
}
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index bf90b07163d1..76b80a0be797 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops {
struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
+ int (*pwrite)(struct drm_i915_gem_object *,
+ const struct drm_i915_gem_pwrite *);
+
int (*dmabuf_export)(struct drm_i915_gem_object *);
void (*release)(struct drm_i915_gem_object *);
};
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 155906e84812..df20e9bc1c0f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -512,10 +512,36 @@ err_unpin:
return ret;
}
+static void
+i915_vma_remove(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+
+ drm_mm_remove_node(&vma->node);
+ list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+ /* Since the unbound list is global, only move to that list if
+ * no more VMAs exist.
+ */
+ if (--obj->bind_count == 0)
+ list_move_tail(&obj->global_link,
+ &to_i915(obj->base.dev)->mm.unbound_list);
+
+ /* And finally now the object is completely decoupled from this vma,
+ * we can drop its hold on the backing storage and allow it to be
+ * reaped by the shrinker.
+ */
+ i915_gem_object_unpin_pages(obj);
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+}
+
int __i915_vma_do_pin(struct i915_vma *vma,
u64 size, u64 alignment, u64 flags)
{
- unsigned int bound = vma->flags;
+ const unsigned int bound = vma->flags;
int ret;
lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma,
if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
ret = -EBUSY;
- goto err;
+ goto err_unpin;
}
if ((bound & I915_VMA_BIND_MASK) == 0) {
ret = i915_vma_insert(vma, size, alignment, flags);
if (ret)
- goto err;
+ goto err_unpin;
}
ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
if (ret)
- goto err;
+ goto err_remove;
if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
__i915_vma_set_map_and_fenceable(vma);
@@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma,
GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
return 0;
-err:
+err_remove:
+ if ((bound & I915_VMA_BIND_MASK) == 0) {
+ GEM_BUG_ON(vma->pages);
+ i915_vma_remove(vma);
+ }
+err_unpin:
__i915_vma_unpin(vma);
return ret;
}
@@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma)
}
vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
- drm_mm_remove_node(&vma->node);
- list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
if (vma->pages != obj->mm.pages) {
GEM_BUG_ON(!vma->pages);
sg_free_table(vma->pages);
@@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma)
}
vma->pages = NULL;
- /* Since the unbound list is global, only move to that list if
- * no more VMAs exist. */
- if (--obj->bind_count == 0)
- list_move_tail(&obj->global_link,
- &to_i915(obj->base.dev)->mm.unbound_list);
-
- /* And finally now the object is completely decoupled from this vma,
- * we can drop its hold on the backing storage and allow it to be
- * reaped by the shrinker.
- */
- i915_gem_object_unpin_pages(obj);
- GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+ i915_vma_remove(vma);
destroy:
if (unlikely(i915_vma_is_closed(vma)))
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 01341670738f..3282b0f4b134 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc,
/* drm_atomic_helper_update_legacy_modeset_state might not be called. */
crtc->base.mode = crtc->base.state->mode;
- DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n",
- old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h,
- pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-
/*
* Update pipe size and adjust fitter if needed: the reason for this is
* that in compute_mode_changes we check the native mode (not the pfit
@@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
struct intel_crtc_scaler_state *scaler_state =
&crtc->config->scaler_state;
- DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
-
if (crtc->config->pch_pfit.enabled) {
int id;
- if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
- DRM_ERROR("Requesting pfit without getting a scaler first\n");
+ if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
return;
- }
id = scaler_state->scaler_id;
I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
-
- DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
}
}
@@ -14379,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state,
} while (progress);
}
+static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
+{
+ struct intel_atomic_state *state, *next;
+ struct llist_node *freed;
+
+ freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+ llist_for_each_entry_safe(state, next, freed, freed)
+ drm_atomic_state_put(&state->base);
+}
+
+static void intel_atomic_helper_free_state_worker(struct work_struct *work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+
+ intel_atomic_helper_free_state(dev_priv);
+}
+
static void intel_atomic_commit_tail(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
@@ -14545,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
* can happen also when the device is completely off.
*/
intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
+ intel_atomic_helper_free_state(dev_priv);
}
static void intel_atomic_commit_work(struct work_struct *work)
@@ -14946,17 +14956,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
to_intel_atomic_state(old_crtc_state->state);
bool modeset = needs_modeset(crtc->state);
+ if (!modeset &&
+ (intel_cstate->base.color_mgmt_changed ||
+ intel_cstate->update_pipe)) {
+ intel_color_set_csc(crtc->state);
+ intel_color_load_luts(crtc->state);
+ }
+
/* Perform vblank evasion around commit operation */
intel_pipe_update_start(intel_crtc);
if (modeset)
goto out;
- if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
- intel_color_set_csc(crtc->state);
- intel_color_load_luts(crtc->state);
- }
-
if (intel_cstate->update_pipe)
intel_update_pipe_config(intel_crtc, old_intel_cstate);
else if (INTEL_GEN(dev_priv) >= 9)
@@ -16599,18 +16611,6 @@ fail:
drm_modeset_acquire_fini(&ctx);
}
-static void intel_atomic_helper_free_state(struct work_struct *work)
-{
- struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv), atomic_helper.free_work);
- struct intel_atomic_state *state, *next;
- struct llist_node *freed;
-
- freed = llist_del_all(&dev_priv->atomic_helper.free_list);
- llist_for_each_entry_safe(state, next, freed, freed)
- drm_atomic_state_put(&state->base);
-}
-
int intel_modeset_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16631,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev)
dev->mode_config.funcs = &intel_mode_funcs;
INIT_WORK(&dev_priv->atomic_helper.free_work,
- intel_atomic_helper_free_state);
+ intel_atomic_helper_free_state_worker);
intel_init_quirks(dev);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 1b8ba2e77539..2d449fb5d1d2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
bool *enabled, int width, int height)
{
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
- unsigned long conn_configured, mask;
+ unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
int i, j;
bool *save_enabled;
bool fallback = true;
int num_connectors_enabled = 0;
int num_connectors_detected = 0;
- int pass = 0;
save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
if (!save_enabled)
@@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
mask = BIT(count) - 1;
conn_configured = 0;
retry:
+ conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -387,7 +387,7 @@ retry:
if (conn_configured & BIT(i))
continue;
- if (pass == 0 && !connector->has_tile)
+ if (conn_seq == 0 && !connector->has_tile)
continue;
if (connector->status == connector_status_connected)
@@ -498,10 +498,8 @@ retry:
conn_configured |= BIT(i);
}
- if ((conn_configured & mask) != mask) {
- pass++;
+ if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
- }
/*
* If the BIOS didn't enable everything it could, fall back to have the
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 249623d45be0..940bab22d464 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
break;
}
+ /* When byt can survive without system hang with dynamic
+ * sw freq adjustments, this restriction can be lifted.
+ */
+ if (IS_VALLEYVIEW(dev_priv))
+ goto skip_hw_write;
+
I915_WRITE(GEN6_RP_UP_EI,
GT_INTERVAL_FROM_US(dev_priv, ei_up));
I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
+skip_hw_write:
dev_priv->rps.power = new_power;
dev_priv->rps.up_threshold = threshold_up;
dev_priv->rps.down_threshold = threshold_down;
@@ -7916,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
* @timeout_base_ms: timeout for polling with preemption enabled
*
* Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+10 ms expires.
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
* The request is acknowledged once the PCODE reply dword equals @reply after
* applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 10 ms with
+ * for @timeout_base_ms and if this times out for another 50 ms with
* preemption disabled.
*
* Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
@@ -7955,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
* worst case) _and_ PCODE was busy for some reason even after a
* (queued) request and @timeout_base_ms delay. As a workaround retry
* the poll with preemption disabled to maximize the number of
- * requests. Increase the timeout from @timeout_base_ms to 10ms to
+ * requests. Increase the timeout from @timeout_base_ms to 50ms to
* account for interrupts that could reduce the number of these
- * requests.
+ * requests, and for any quirks of the PCODE firmware that delays
+ * the request completion.
*/
DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
WARN_ON_ONCE(timeout_base_ms > 3);
preempt_disable();
- ret = wait_for_atomic(COND, 10);
+ ret = wait_for_atomic(COND, 50);
preempt_enable();
out:
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 9ef54688872a..9481ca9a3ae7 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane,
int scaler_id = plane_state->scaler_id;
const struct intel_scaler *scaler;
- DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n",
- plane_id, PS_PLANE_SEL(plane_id));
-
scaler = &crtc_state->scaler_state.scalers[scaler_id];
I915_WRITE(SKL_PS_CTRL(pipe, scaler_id),
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index abe08885a5ba..b7ff592b14f5 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
for_each_fw_domain_masked(d, fw_domains, dev_priv)
fw_domain_wait_ack(d);
+
+ dev_priv->uncore.fw_domains_active |= fw_domains;
}
static void
@@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
fw_domain_put(d);
fw_domain_posting_read(d);
}
+
+ dev_priv->uncore.fw_domains_active &= ~fw_domains;
}
static void
@@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
if (WARN_ON(domain->wake_count == 0))
domain->wake_count++;
- if (--domain->wake_count == 0) {
+ if (--domain->wake_count == 0)
dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask);
- dev_priv->uncore.fw_domains_active &= ~domain->mask;
- }
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
@@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
fw_domains &= ~domain->mask;
}
- if (fw_domains) {
+ if (fw_domains)
dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
- dev_priv->uncore.fw_domains_active |= fw_domains;
- }
}
/**
@@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
fw_domain_arm_timer(domain);
dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
- dev_priv->uncore.fw_domains_active |= fw_domains;
}
static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index af267c35d813..ee5883f59be5 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
struct drm_gem_object *obj = buffer->priv;
int ret = 0;
- if (WARN_ON(!obj->filp))
- return -EINVAL;
-
ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index d12b8978142f..72e1588580a1 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
(rdev->pdev->device == 0x6667)) {
max_sclk = 75000;
}
+ } else if (rdev->family == CHIP_OLAND) {
+ if ((rdev->pdev->device == 0x6604) &&
+ (rdev->pdev->subsystem_vendor == 0x1028) &&
+ (rdev->pdev->subsystem_device == 0x066F)) {
+ max_sclk = 75000;
+ }
}
if (rps->vce_active) {
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index f80bf9385e41..d745e8b50fb8 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
+ unsigned long flags;
WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
mutex_lock(&tilcdc_crtc->enable_lock);
@@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG,
LCDC_PALETTE_LOAD_MODE(DATA_ONLY),
LCDC_PALETTE_LOAD_MODE_MASK);
+
+ /* There is no real chance for a race here as the time stamp
+ * is taken before the raster DMA is started. The spin-lock is
+ * taken to have a memory barrier after taking the time-stamp
+ * and to avoid a context switch between taking the stamp and
+ * enabling the raster.
+ */
+ spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+ tilcdc_crtc->last_vblank = ktime_get();
tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
+ spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
drm_crtc_vblank_on(crtc);
@@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
}
drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
- tilcdc_crtc->last_vblank = 0;
tilcdc_crtc->enabled = false;
mutex_unlock(&tilcdc_crtc->enable_lock);
@@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
{
struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
struct drm_device *dev = crtc->dev;
- unsigned long flags;
WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
@@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
drm_framebuffer_reference(fb);
crtc->primary->fb = fb;
+ tilcdc_crtc->event = event;
- spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+ mutex_lock(&tilcdc_crtc->enable_lock);
- if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) {
+ if (tilcdc_crtc->enabled) {
+ unsigned long flags;
ktime_t next_vblank;
s64 tdiff;
- next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
- 1000000 / crtc->hwmode.vrefresh);
+ spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+ next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+ 1000000 / crtc->hwmode.vrefresh);
tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US)
tilcdc_crtc->next_fb = fb;
- }
-
- if (tilcdc_crtc->next_fb != fb)
- set_scanout(crtc, fb);
+ else
+ set_scanout(crtc, fb);
- tilcdc_crtc->event = event;
+ spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+ }
- spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+ mutex_unlock(&tilcdc_crtc->enable_lock);
return 0;
}
@@ -1036,5 +1047,5 @@ int tilcdc_crtc_create(struct drm_device *dev)
fail:
tilcdc_crtc_destroy(crtc);
- return -ENOMEM;
+ return ret;
}
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1aeb80e52424..8c54cb8f5d6d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@ config HID_CHERRY
Support for Cherry Cymotion keyboard.
config HID_CHICONY
- tristate "Chicony Tactical pad"
+ tristate "Chicony devices"
depends on HID
default !EXPERT
---help---
- Support for Chicony Tactical pad.
+ Support for Chicony Tactical pad and special keys on Chicony keyboards.
config HID_CORSAIR
tristate "Corsair devices"
@@ -190,6 +190,7 @@ config HID_CORSAIR
Supported devices:
- Vengeance K90
+ - Scimitar PRO RGB
config HID_PRODIKEYS
tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec199fee..f04ed9aabc3f 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ }
};
MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e9e87d337446..3ceb4a2af381 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
@@ -1910,6 +1911,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
index c0303f61c26a..9ba5d98a1180 100644
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
*
* Supported devices:
* - Vengeance K90 Keyboard
+ * - Scimitar PRO RGB Gaming Mouse
*
* Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
*/
/*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
return 0;
}
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
+{
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+ if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+ /*
+ * Corsair Scimitar RGB Pro report descriptor is broken and
+ * defines two different Logical Minimum for the Consumer
+ * Application. The byte 77 should be a 0x26 defining a 16
+ * bits integer for the Logical Maximum but it is a 0x16
+ * instead (Logical Minimum)
+ */
+ switch (hdev->product) {
+ case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+ if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+ && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+ hid_info(hdev, "Fixing up report descriptor\n");
+ rdesc[77] = 0x26;
+ }
+ break;
+ }
+
+ }
+ return rdesc;
+}
+
static const struct hid_device_id corsair_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
.driver_data = CORSAIR_USE_K90_MACRO |
CORSAIR_USE_K90_BACKLIGHT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+ USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
{}
};
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
.event = corsair_event,
.remove = corsair_remove,
.input_mapping = corsair_input_mapping,
+ .report_fixup = corsair_mouse_report_fixup,
};
module_hid_driver(corsair_driver);
MODULE_LICENSE("GPL");
+/* Original K90 driver author */
MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
MODULE_DESCRIPTION("HID driver for Corsair devices");
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 86c95d30ac80..0e2e7c571d22 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -278,6 +278,9 @@
#define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13
#define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15
#define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e
#define USB_VENDOR_ID_CREATIVELABS 0x041e
#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c
@@ -557,6 +560,7 @@
#define USB_VENDOR_ID_JESS 0x0c45
#define USB_DEVICE_ID_JESS_YUREX 0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
#define USB_VENDOR_ID_JESS2 0x0f30
#define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index f405b07d0381..740996f9bdd4 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2632,6 +2632,8 @@ err_stop:
sony_leds_remove(sc);
if (sc->quirks & SONY_BATTERY_SUPPORT)
sony_battery_remove(sc);
+ if (sc->touchpad)
+ sony_unregister_touchpad(sc);
sony_cancel_work_sync(sc);
kfree(sc->output_report_dmabuf);
sony_remove_dev_list(sc);
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index d6847a664446..a69a3c88ab29 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index be8f7e2a026f..994bddc55b82 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev)
/* make sure we don't trigger the LEDs */
wacom_led_groups_release(wacom);
- wacom_release_resources(wacom);
+
+ if (wacom->wacom_wac.features.type != REMOTE)
+ wacom_release_resources(wacom);
hid_set_drvdata(hdev, NULL);
}
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4aa3de9f1163..94250c293be2 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
- input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
- input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+ if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
+ input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
+ input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+ }
break;
case WACOM_HID_WD_FINGERWHEEL:
wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
@@ -4197,10 +4199,10 @@ static const struct wacom_features wacom_features_0x343 =
WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
static const struct wacom_features wacom_features_0x360 =
{ "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
- INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+ INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
static const struct wacom_features wacom_features_0x361 =
{ "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
- INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+ INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
static const struct wacom_features wacom_features_HID_ANY_ID =
{ "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 05bbf171df37..f96601268f71 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,8 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
static int __init crossbar_of_init(struct device_node *node)
{
+ u32 max = 0, entry, reg_size;
int i, size, reserved = 0;
- u32 max = 0, entry;
const __be32 *irqsr;
int ret = -ENOMEM;
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
if (!cb->register_offsets)
goto err_irq_map;
- of_property_read_u32(node, "ti,reg-size", &size);
+ of_property_read_u32(node, "ti,reg-size", &reg_size);
- switch (size) {
+ switch (reg_size) {
case 1:
cb->write = crossbar_writeb;
break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
continue;
cb->register_offsets[i] = reserved;
- reserved += size;
+ reserved += reg_size;
}
of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 23201004fd7a..f77f840d2b5f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
}
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+ struct its_node *its = data;
+
+ /* On QDF2400, the size of the ITE is 16Bytes */
+ its->ite_size = 16;
+}
+
static const struct gic_quirk its_quirks[] = {
#ifdef CONFIG_CAVIUM_ERRATUM_22375
{
@@ -1618,6 +1626,14 @@ static const struct gic_quirk its_quirks[] = {
.init = its_enable_quirk_cavium_23144,
},
#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+ {
+ .desc = "ITS: QDF2400 erratum 0065",
+ .iidr = 0x00001070, /* QDF2400 ITS rev 1.x */
+ .mask = 0xffffffff,
+ .init = its_enable_quirk_qdf2400_e0065,
+ },
+#endif
{
}
};
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 11e13c56126f..2da3ff650e1d 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface,
return -ENODEV;
}
+ if (hostif->desc.bNumEndpoints < 1)
+ return -ENODEV;
+
dev_info(&udev->dev,
"%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
__func__, le16_to_cpu(udev->descriptor.idVendor),
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 3f041b187033..f757cef293f8 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
* To get all the fields, copy all archdata
*/
dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+ dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
#endif /* CONFIG_PCI */
#ifdef DEBUG
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f4ffd1eb8f44..dfb75979e455 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
struct dm_offload *o = container_of(cb, struct dm_offload, cb);
struct bio_list list;
struct bio *bio;
+ int i;
INIT_LIST_HEAD(&o->cb.list);
if (unlikely(!current->bio_list))
return;
- list = *current->bio_list;
- bio_list_init(current->bio_list);
-
- while ((bio = bio_list_pop(&list))) {
- struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
- bio_list_add(current->bio_list, bio);
- continue;
+ for (i = 0; i < 2; i++) {
+ list = current->bio_list[i];
+ bio_list_init(&current->bio_list[i]);
+
+ while ((bio = bio_list_pop(&list))) {
+ struct bio_set *bs = bio->bi_pool;
+ if (unlikely(!bs) || bs == fs_bio_set) {
+ bio_list_add(&current->bio_list[i], bio);
+ continue;
+ }
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_add(&bs->rescue_list, bio);
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ spin_unlock(&bs->rescue_lock);
}
-
- spin_lock(&bs->rescue_lock);
- bio_list_add(&bs->rescue_list, bio);
- queue_work(bs->rescue_workqueue, &bs->rescue_work);
- spin_unlock(&bs->rescue_lock);
}
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2b13117fb918..321ecac23027 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
bm_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (ret == -EAGAIN) {
- memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
s = read_resync_info(mddev, bm_lockres);
if (s) {
pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
+ kfree(cinfo);
return 0;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 548d1b8014f8..f6ae1d67bcd0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
}
EXPORT_SYMBOL(md_flush_request);
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
- struct mddev *mddev = cb->data;
- md_wakeup_thread(mddev->thread);
- kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
static inline struct mddev *mddev_get(struct mddev *mddev)
{
atomic_inc(&mddev->active);
@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
- sb->super_offset = rdev->sb_start;
+ sb->super_offset = cpu_to_le64(rdev->sb_start);
sb->sb_csum = calc_sb_1_csum(sb);
do {
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
/* Check if any mddev parameters have changed */
if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
(mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
- (mddev->layout != le64_to_cpu(sb->layout)) ||
+ (mddev->layout != le32_to_cpu(sb->layout)) ||
(mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
(mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
return true;
@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->layout = info->layout;
mddev->chunk_sectors = info->chunk_size >> 9;
- mddev->max_disks = MD_SB_DISKS;
-
if (mddev->persistent) {
- mddev->flags = 0;
- mddev->sb_flags = 0;
+ mddev->max_disks = MD_SB_DISKS;
+ mddev->flags = 0;
+ mddev->sb_flags = 0;
}
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
- if (!rv)
- revalidate_disk(mddev->gendisk);
+ if (!rv) {
+ if (mddev->queue) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ }
+ }
return rv;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b8859cbf84b6..dde8ecb760c8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev);
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
- return !!blk_check_plugged(md_unplug, mddev,
- sizeof(struct blk_plug_cb));
-}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fbc2d7851b49..a34f58772022 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
static void freeze_array(struct r1conf *conf, int extra)
{
/* Stop sync I/O and normal I/O and wait for everything to
- * go quite.
+ * go quiet.
* This is called in two situations:
* 1) management command handlers (reshape, remove disk, quiesce).
* 2) one normal I/O request failed.
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
- if (bio_data_dir(split) == READ)
+ if (bio_data_dir(split) == READ) {
raid1_read_request(mddev, split);
- else
+
+ /*
+ * If a bio is splitted, the first part of bio will
+ * pass barrier but the bio is queued in
+ * current->bio_list (see generic_make_request). If
+ * there is a raise_barrier() called here, the second
+ * part of bio can't pass barrier. But since the first
+ * part bio isn't dispatched to underlaying disks yet,
+ * the barrier is never released, hence raise_barrier
+ * will alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write
+ * path, the first part of bio is dispatched in a
+ * schedule() call (because of blk plug) or offloaded
+ * to raid10d.
+ * Quitting from the function immediately can change
+ * the bio order queued in bio_list and avoid the deadlock.
+ */
+ if (split != bio) {
+ generic_make_request(bio);
+ break;
+ }
+ } else
raid1_write_request(mddev, split);
} while (split != bio);
}
@@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 063c43d83b72..e89a8d78a9ed 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
!conf->barrier ||
(atomic_read(&conf->nr_pending) &&
current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ (!bio_list_empty(&current->bio_list[0]) ||
+ !bio_list_empty(&current->bio_list[1]))),
conf->resync_lock);
conf->nr_waiting--;
if (!conf->nr_waiting)
@@ -1477,11 +1478,24 @@ retry_write:
mbio->bi_bdev = (void*)rdev;
atomic_inc(&r10_bio->remaining);
+
+ cb = blk_check_plugged(raid10_unplug, mddev,
+ sizeof(*plug));
+ if (cb)
+ plug = container_of(cb, struct raid10_plug_cb,
+ cb);
+ else
+ plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
+ if (plug) {
+ bio_list_add(&plug->pending, mbio);
+ plug->pending_cnt++;
+ } else {
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ }
spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!mddev_check_plugged(mddev))
+ if (!plug)
md_wakeup_thread(mddev->thread);
}
}
@@ -1571,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
+ /*
+ * If a bio is splitted, the first part of bio will pass
+ * barrier but the bio is queued in current->bio_list (see
+ * generic_make_request). If there is a raise_barrier() called
+ * here, the second part of bio can't pass barrier. But since
+ * the first part bio isn't dispatched to underlaying disks
+ * yet, the barrier is never released, hence raise_barrier will
+ * alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write path, the
+ * first part of bio is dispatched in a schedule() call
+ * (because of blk plug) or offloaded to raid10d.
+ * Quitting from the function immediately can change the bio
+ * order queued in bio_list and avoid the deadlock.
+ */
__make_request(mddev, split);
+ if (split != bio && bio_data_dir(bio) == READ) {
+ generic_make_request(bio);
+ break;
+ }
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
@@ -3943,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, size);
- if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
- }
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
mddev->recovery_cp = oldsize;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4fb09b3fcb41..ed5cd705b985 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
(test_bit(R5_Wantdrain, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags))) ||
(srctype == SYNDROME_SRC_WRITTEN &&
- dev->written)) {
+ (dev->written ||
+ test_bit(R5_InJournal, &dev->flags)))) {
if (test_bit(R5_InJournal, &dev->flags))
srcs[slot] = sh->dev[i].orig_page;
else
@@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 248f60d171a5..ffea9859f5a7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2272,10 +2272,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
processed = xgbe_rx_poll(channel, budget);
/* If we processed everything, we are done */
- if (processed < budget) {
- /* Turn off polling */
- napi_complete_done(napi, processed);
-
+ if ((processed < budget) && napi_complete_done(napi, processed)) {
/* Enable Tx and Rx interrupts */
if (pdata->channel_irq_mode)
xgbe_enable_rx_tx_int(pdata, channel);
@@ -2317,10 +2314,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
} while ((processed < budget) && (processed != last_processed));
/* If we processed everything, we are done */
- if (processed < budget) {
- /* Turn off polling */
- napi_complete_done(napi, processed);
-
+ if ((processed < budget) && napi_complete_done(napi, processed)) {
/* Enable Tx and Rx interrupts */
xgbe_enable_rx_tx_ints(pdata);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 581de71a958a..4c6c882c6a1c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self)
if (!((1U << i) & self->msix_entry_mask))
continue;
- free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
if (pdev->msix_enabled)
irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+ free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
self->msix_entry_mask &= ~(1U << i);
}
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index d8d06fdfc42b..ac76fc251d26 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
- /* VF with OLD Hypervisor or old PF do not support filtering */
if (IS_PF(bp)) {
if (chip_is_e1x)
bp->accept_any_vlan = true;
else
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
- } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
}
+ /* For VF we'll know whether to enable VLAN filtering after
+ * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+ */
dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
dev->features |= NETIF_F_HIGHDMA;
@@ -13738,7 +13736,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
if (!netif_running(bp->dev)) {
DP(BNX2X_MSG_PTP,
"PTP adjfreq called while the interface is down\n");
- return -EFAULT;
+ return -ENETDOWN;
}
if (ppb < 0) {
@@ -13797,6 +13795,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP adjtime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
timecounter_adjtime(&bp->timecounter, delta);
@@ -13809,6 +13813,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP gettime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timecounter_read(&bp->timecounter);
DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13824,6 +13834,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP settime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timespec64_to_ns(ts);
DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
@@ -13991,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev,
rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
if (rc)
goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+ /* VF with OLD Hypervisor or old PF do not support filtering */
+ if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+#endif
}
/* Enable SRIOV if capability found in configuration space */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 6fad22adbbb9..bdfd53b46bc5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
/* Add/Remove the filter */
rc = bnx2x_config_vlan_mac(bp, &ramrod);
- if (rc && rc != -EEXIST) {
+ if (rc == -EEXIST)
+ return 0;
+ if (rc) {
BNX2X_ERR("Failed to %s %s\n",
filter->add ? "add" : "delete",
(filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
return rc;
}
+ filter->applied = true;
+
return 0;
}
@@ -469,8 +473,10 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
/* Rollback if needed */
if (i != filters->count) {
BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
- i, filters->count + 1);
+ i, filters->count);
while (--i >= 0) {
+ if (!filters->filters[i].applied)
+ continue;
filters->filters[i].add = !filters->filters[i].add;
bnx2x_vf_mac_vlan_config(bp, vf, qid,
&filters->filters[i],
@@ -1899,7 +1905,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
continue;
}
- DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+ DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+ "add addresses for vf %d\n", vf->abs_vfid);
for_each_vfq(vf, j) {
struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
@@ -1920,11 +1927,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
cpu_to_le32(U64_HI(q_stats_addr));
cur_query_entry->address.lo =
cpu_to_le32(U64_LO(q_stats_addr));
- DP(BNX2X_MSG_IOV,
- "added address %x %x for vf %d queue %d client %d\n",
- cur_query_entry->address.hi,
- cur_query_entry->address.lo, cur_query_entry->funcID,
- j, cur_query_entry->index);
+ DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+ "added address %x %x for vf %d queue %d client %d\n",
+ cur_query_entry->address.hi,
+ cur_query_entry->address.lo,
+ cur_query_entry->funcID,
+ j, cur_query_entry->index);
cur_query_entry++;
cur_data_offset += sizeof(struct per_queue_stats);
stats_count++;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 7a6d406f4c11..888d0b6632e8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
bool add;
+ bool applied;
u8 *mac;
u16 vid;
};
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index bfae300cf25f..76a4668c50fe 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
struct bnx2x *bp = netdev_priv(dev);
struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
- int rc, i = 0;
+ int rc = 0, i = 0;
struct netdev_hw_addr *ha;
if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
/* Get Rx mode requested */
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+ /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+ if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+ DP(NETIF_MSG_IFUP,
+ "VF supports not more than %d multicast MAC addresses\n",
+ PFVF_MAX_MULTICAST_PER_VF);
+ rc = -EINVAL;
+ goto out;
+ }
+
netdev_for_each_mc_addr(ha, dev) {
DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
i++;
}
- /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
- * addresses tops
- */
- if (i >= PFVF_MAX_MULTICAST_PER_VF) {
- DP(NETIF_MSG_IFUP,
- "VF supports not more than %d multicast MAC addresses\n",
- PFVF_MAX_MULTICAST_PER_VF);
- return -EINVAL;
- }
-
req->n_multicast = i;
req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
out:
bnx2x_vfpf_finalize(bp, &req->first_tlv);
- return 0;
+ return rc;
}
/* request pf to add a vlan for the vf */
@@ -1778,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
goto op_err;
}
+ /* build vlan list */
+ fl = NULL;
+
+ rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+ VFPF_VLAN_FILTER);
+ if (rc)
+ goto op_err;
+
+ if (fl) {
+ /* set vlan list */
+ rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+ msg->vf_qid,
+ false);
+ if (rc)
+ goto op_err;
+ }
+
}
if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 235733e91c79..32de4589d16a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4465,6 +4465,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
}
#endif
+ if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+ FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+ bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
switch (resp->port_partition_type) {
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
@@ -5507,8 +5511,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
}
- link_info->support_auto_speeds =
- le16_to_cpu(resp->supported_speeds_auto_mode);
+ if (resp->supported_speeds_auto_mode)
+ link_info->support_auto_speeds =
+ le16_to_cpu(resp->supported_speeds_auto_mode);
hwrm_phy_qcaps_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -6495,8 +6500,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
if (!silent)
bnxt_dbg_dump_states(bp);
if (netif_running(bp->dev)) {
+ int rc;
+
+ if (!silent)
+ bnxt_ulp_stop(bp);
bnxt_close_nic(bp, false, false);
- bnxt_open_nic(bp, false, false);
+ rc = bnxt_open_nic(bp, false, false);
+ if (!silent && !rc)
+ bnxt_ulp_start(bp);
}
}
@@ -7444,6 +7455,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_pci_clean;
+ rc = bnxt_hwrm_func_reset(bp);
+ if (rc)
+ goto init_err_pci_clean;
+
bnxt_hwrm_fw_set_time(bp);
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -7554,10 +7569,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_pci_clean;
- rc = bnxt_hwrm_func_reset(bp);
- if (rc)
- goto init_err_pci_clean;
-
rc = bnxt_init_int_mode(bp);
if (rc)
goto init_err_pci_clean;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index faf26a2f726b..c7a5b84a5cb2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -993,6 +993,7 @@ struct bnxt {
BNXT_FLAG_ROCEV2_CAP)
#define BNXT_FLAG_NO_AGG_RINGS 0x20000
#define BNXT_FLAG_RX_PAGE_MODE 0x40000
+ #define BNXT_FLAG_FW_LLDP_AGENT 0x80000
#define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index fdf2d8caf7bf..03532061d211 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp)
return;
bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
- if (BNXT_PF(bp))
+ if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
else
bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f92896835d2a..69015fa50f20 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
/*
* Broadcom GENET (Gigabit Ethernet) controller driver
*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
genet_dma_ring_regs[r]);
}
+static int bcmgenet_begin(struct net_device *dev)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ /* Turn on the clock */
+ return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ /* Turn off the clock */
+ clk_disable_unprepare(priv->clk);
+}
+
static int bcmgenet_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
@@ -778,8 +794,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
/* Misc UniMAC counters */
STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
- UMAC_RBUF_OVFL_CNT),
- STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+ UMAC_RBUF_OVFL_CNT_V1),
+ STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+ UMAC_RBUF_ERR_CNT_V1),
STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +838,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
}
}
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+ u16 new_offset;
+ u32 val;
+
+ switch (offset) {
+ case UMAC_RBUF_OVFL_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_OVFL_CNT_V2;
+ else
+ new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ case UMAC_RBUF_ERR_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_ERR_CNT_V2;
+ else
+ new_offset = RBUF_ERR_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ default:
+ val = bcmgenet_umac_readl(priv, offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0, offset);
+ break;
+ }
+
+ return val;
+}
+
static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
{
int i, j = 0;
@@ -836,19 +892,28 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
case BCMGENET_STAT_NETDEV:
case BCMGENET_STAT_SOFT:
continue;
- case BCMGENET_STAT_MIB_RX:
- case BCMGENET_STAT_MIB_TX:
case BCMGENET_STAT_RUNT:
- if (s->type != BCMGENET_STAT_MIB_RX)
- offset = BCMGENET_STAT_OFFSET;
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_TX:
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_RX:
val = bcmgenet_umac_readl(priv,
UMAC_MIB_START + j + offset);
+ offset = 0; /* Reset Offset */
break;
case BCMGENET_STAT_MISC:
- val = bcmgenet_umac_readl(priv, s->reg_offset);
- /* clear if overflowed */
- if (val == ~0)
- bcmgenet_umac_writel(priv, 0, s->reg_offset);
+ if (GENET_IS_V1(priv)) {
+ val = bcmgenet_umac_readl(priv, s->reg_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0,
+ s->reg_offset);
+ } else {
+ val = bcmgenet_update_stat_misc(priv,
+ s->reg_offset);
+ }
break;
}
@@ -973,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
/* standard ethtool support functions. */
static const struct ethtool_ops bcmgenet_ethtool_ops = {
+ .begin = bcmgenet_begin,
+ .complete = bcmgenet_complete,
.get_strings = bcmgenet_get_strings,
.get_sset_count = bcmgenet_get_sset_count,
.get_ethtool_stats = bcmgenet_get_ethtool_stats,
@@ -1167,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
struct bcmgenet_priv *priv = netdev_priv(dev);
struct device *kdev = &priv->pdev->dev;
struct enet_cb *tx_cb_ptr;
- struct netdev_queue *txq;
unsigned int pkts_compl = 0;
unsigned int bytes_compl = 0;
unsigned int c_index;
@@ -1219,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
dev->stats.tx_packets += pkts_compl;
dev->stats.tx_bytes += bytes_compl;
- txq = netdev_get_tx_queue(dev, ring->queue);
- netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
- if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
- if (netif_tx_queue_stopped(txq))
- netif_tx_wake_queue(txq);
- }
+ netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+ pkts_compl, bytes_compl);
return pkts_compl;
}
@@ -1248,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
struct bcmgenet_tx_ring *ring =
container_of(napi, struct bcmgenet_tx_ring, napi);
unsigned int work_done = 0;
+ struct netdev_queue *txq;
+ unsigned long flags;
- work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+ spin_lock_irqsave(&ring->lock, flags);
+ work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+ if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+ txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+ netif_tx_wake_queue(txq);
+ }
+ spin_unlock_irqrestore(&ring->lock, flags);
if (work_done == 0) {
napi_complete(napi);
@@ -2457,24 +2526,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
/* Interrupt bottom half */
static void bcmgenet_irq_task(struct work_struct *work)
{
+ unsigned long flags;
+ unsigned int status;
struct bcmgenet_priv *priv = container_of(
work, struct bcmgenet_priv, bcmgenet_irq_work);
netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
- if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
- priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+ spin_lock_irqsave(&priv->lock, flags);
+ status = priv->irq0_stat;
+ priv->irq0_stat = 0;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (status & UMAC_IRQ_MPD_R) {
netif_dbg(priv, wol, priv->dev,
"magic packet detected, waking up\n");
bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
}
/* Link UP/DOWN event */
- if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+ if (status & UMAC_IRQ_LINK_EVENT)
phy_mac_interrupt(priv->phydev,
- !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
- priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
- }
+ !!(status & UMAC_IRQ_LINK_UP));
}
/* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2483,22 +2556,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
- unsigned int index;
+ unsigned int index, status;
- /* Save irq status for bottom-half processing. */
- priv->irq1_stat =
- bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+ "%s: IRQ=0x%x\n", __func__, status);
/* Check Rx priority queue interrupts */
for (index = 0; index < priv->hw_params->rx_queues; index++) {
- if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+ if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
continue;
rx_ring = &priv->rx_rings[index];
@@ -2511,7 +2583,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
/* Check Tx priority queue interrupts */
for (index = 0; index < priv->hw_params->tx_queues; index++) {
- if (!(priv->irq1_stat & BIT(index)))
+ if (!(status & BIT(index)))
continue;
tx_ring = &priv->tx_rings[index];
@@ -2531,19 +2603,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
+ unsigned int status;
+ unsigned long flags;
- /* Save irq status for bottom-half processing. */
- priv->irq0_stat =
- bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "IRQ=0x%x\n", priv->irq0_stat);
+ "IRQ=0x%x\n", status);
- if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+ if (status & UMAC_IRQ_RXDMA_DONE) {
rx_ring = &priv->rx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2552,7 +2625,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
}
}
- if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+ if (status & UMAC_IRQ_TXDMA_DONE) {
tx_ring = &priv->tx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2561,22 +2634,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
}
}
- if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
- UMAC_IRQ_PHY_DET_F |
- UMAC_IRQ_LINK_EVENT |
- UMAC_IRQ_HFB_SM |
- UMAC_IRQ_HFB_MM |
- UMAC_IRQ_MPD_R)) {
- /* all other interested interrupts handled in bottom half */
- schedule_work(&priv->bcmgenet_irq_work);
- }
-
if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
- priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
- priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+ status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
wake_up(&priv->wq);
}
+ /* all other interested interrupts handled in bottom half */
+ status &= (UMAC_IRQ_LINK_EVENT |
+ UMAC_IRQ_MPD_R);
+ if (status) {
+ /* Save irq status for bottom-half processing. */
+ spin_lock_irqsave(&priv->lock, flags);
+ priv->irq0_stat |= status;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ schedule_work(&priv->bcmgenet_irq_work);
+ }
+
return IRQ_HANDLED;
}
@@ -2801,6 +2875,8 @@ err_irq0:
err_fini_dma:
bcmgenet_fini_dma(priv);
err_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
@@ -3177,6 +3253,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
*/
gphy_rev = reg & 0xffff;
+ /* This is reserved so should require special treatment */
+ if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+ pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+ return;
+ }
+
/* This is the good old scheme, just GPHY major, no minor nor patch */
if ((gphy_rev & 0xf0) != 0)
priv->gphy_rev = gphy_rev << 8;
@@ -3185,12 +3267,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
else if ((gphy_rev & 0xff00) != 0)
priv->gphy_rev = gphy_rev;
- /* This is reserved so should require special treatment */
- else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
- pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
- return;
- }
-
#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (!(params->flags & GENET_HAS_40BITS))
pr_warn("GENET does not support 40-bits PA\n");
@@ -3233,6 +3309,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
const void *macaddr;
struct resource *r;
int err = -EIO;
+ const char *phy_mode_str;
/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3276,6 +3353,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
goto err;
}
+ spin_lock_init(&priv->lock);
+
SET_NETDEV_DEV(dev, &pdev->dev);
dev_set_drvdata(&pdev->dev, dev);
ether_addr_copy(dev->dev_addr, macaddr);
@@ -3338,6 +3417,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
priv->clk_eee = NULL;
}
+ /* If this is an internal GPHY, power it on now, before UniMAC is
+ * brought out of reset as absolutely no UniMAC activity is allowed
+ */
+ if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+ !strcasecmp(phy_mode_str, "internal"))
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
err = reset_umac(priv);
if (err)
goto err_clk_disable;
@@ -3502,6 +3588,8 @@ static int bcmgenet_resume(struct device *d)
return 0;
out_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34d331a..db7f289d65ae 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
#define MDIO_REG_SHIFT 16
#define MDIO_REG_MASK 0x1F
-#define UMAC_RBUF_OVFL_CNT 0x61C
+#define UMAC_RBUF_OVFL_CNT_V1 0x61C
+#define RBUF_OVFL_CNT_V2 0x80
+#define RBUF_OVFL_CNT_V3PLUS 0x94
#define UMAC_MPD_CTRL 0x620
#define MPD_EN (1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
#define UMAC_MPD_PW_MS 0x624
#define UMAC_MPD_PW_LS 0x628
-#define UMAC_RBUF_ERR_CNT 0x634
+#define UMAC_RBUF_ERR_CNT_V1 0x634
+#define RBUF_ERR_CNT_V2 0x84
+#define RBUF_ERR_CNT_V3PLUS 0x98
#define UMAC_MDF_ERR_CNT 0x638
#define UMAC_MDF_CTRL 0x650
#define UMAC_MDF_ADDR 0x654
@@ -619,11 +623,13 @@ struct bcmgenet_priv {
struct work_struct bcmgenet_irq_work;
int irq0;
int irq1;
- unsigned int irq0_stat;
- unsigned int irq1_stat;
int wol_irq;
bool wol_irq_disabled;
+ /* shared status */
+ spinlock_t lock;
+ unsigned int irq0_stat;
+
/* HW descriptors/checksum variables */
bool desc_64b_en;
bool desc_rxchk_en;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index be9c0e3f5ade..92f46b1375c3 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -152,7 +152,7 @@ struct octnic_gather {
*/
struct octeon_sg_entry *sg;
- u64 sg_dma_ptr;
+ dma_addr_t sg_dma_ptr;
};
struct handshake {
@@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio)
struct octnic_gather *g;
int i;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+
if (!lio->glist)
return;
@@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio)
do {
g = (struct octnic_gather *)
list_delete_head(&lio->glist[i]);
- if (g) {
- if (g->sg) {
- dma_unmap_single(&lio->oct_dev->
- pci_dev->dev,
- g->sg_dma_ptr,
- g->sg_size,
- DMA_TO_DEVICE);
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- }
+ if (g)
kfree(g);
- }
} while (g);
+
+ if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+ lio_dma_free(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ lio->glists_virt_base[i],
+ lio->glists_dma_base[i]);
+ }
}
- kfree((void *)lio->glist);
- kfree((void *)lio->glist_lock);
+ kfree(lio->glists_virt_base);
+ lio->glists_virt_base = NULL;
+
+ kfree(lio->glists_dma_base);
+ lio->glists_dma_base = NULL;
+
+ kfree(lio->glist);
+ lio->glist = NULL;
}
/**
@@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
GFP_KERNEL);
if (!lio->glist_lock)
- return 1;
+ return -ENOMEM;
lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
GFP_KERNEL);
if (!lio->glist) {
- kfree((void *)lio->glist_lock);
- return 1;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+ return -ENOMEM;
+ }
+
+ lio->glist_entry_size =
+ ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+ /* allocate memory to store virtual and dma base address of
+ * per glist consistent memory
+ */
+ lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+ GFP_KERNEL);
+ lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+ GFP_KERNEL);
+
+ if (!lio->glists_virt_base || !lio->glists_dma_base) {
+ delete_glists(lio);
+ return -ENOMEM;
}
for (i = 0; i < num_iqs; i++) {
@@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
INIT_LIST_HEAD(&lio->glist[i]);
+ lio->glists_virt_base[i] =
+ lio_dma_alloc(oct,
+ lio->glist_entry_size * lio->tx_qsize,
+ &lio->glists_dma_base[i]);
+
+ if (!lio->glists_virt_base[i]) {
+ delete_glists(lio);
+ return -ENOMEM;
+ }
+
for (j = 0; j < lio->tx_qsize; j++) {
g = kzalloc_node(sizeof(*g), GFP_KERNEL,
numa_node);
@@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
if (!g)
break;
- g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
- OCT_SG_ENTRY_SIZE);
+ g->sg = lio->glists_virt_base[i] +
+ (j * lio->glist_entry_size);
- g->sg = kmalloc_node(g->sg_size + 8,
- GFP_KERNEL, numa_node);
- if (!g->sg)
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
- }
-
- /* The gather component should be aligned on 64-bit
- * boundary
- */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
- }
- g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev,
- g->sg_dma_ptr)) {
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- kfree(g);
- break;
- }
+ g->sg_dma_ptr = lio->glists_dma_base[i] +
+ (j * lio->glist_entry_size);
list_add_tail(&g->list, &lio->glist[i]);
}
if (j != lio->tx_qsize) {
delete_glists(lio);
- return 1;
+ return -ENOMEM;
}
}
@@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf)
i++;
}
- dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
- g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
@@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf)
i++;
}
- dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
- g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
@@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
i++;
}
- dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
- g->sg_size, DMA_TO_DEVICE);
dptr = g->sg_dma_ptr;
if (OCTEON_CN23XX_PF(oct))
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9d5e03502c76..7b83be4ce1fe 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -108,6 +108,8 @@ struct octnic_gather {
* received from the IP layer.
*/
struct octeon_sg_entry *sg;
+
+ dma_addr_t sg_dma_ptr;
};
struct octeon_device_priv {
@@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio)
struct octnic_gather *g;
int i;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+
if (!lio->glist)
return;
@@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio)
do {
g = (struct octnic_gather *)
list_delete_head(&lio->glist[i]);
- if (g) {
- if (g->sg)
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
+ if (g)
kfree(g);
- }
} while (g);
+
+ if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+ lio_dma_free(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ lio->glists_virt_base[i],
+ lio->glists_dma_base[i]);
+ }
}
+ kfree(lio->glists_virt_base);
+ lio->glists_virt_base = NULL;
+
+ kfree(lio->glists_dma_base);
+ lio->glists_dma_base = NULL;
+
kfree(lio->glist);
- kfree(lio->glist_lock);
+ lio->glist = NULL;
}
/**
@@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs)
lio->glist_lock =
kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
if (!lio->glist_lock)
- return 1;
+ return -ENOMEM;
lio->glist =
kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
if (!lio->glist) {
kfree(lio->glist_lock);
- return 1;
+ lio->glist_lock = NULL;
+ return -ENOMEM;
+ }
+
+ lio->glist_entry_size =
+ ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+ /* allocate memory to store virtual and dma base address of
+ * per glist consistent memory
+ */
+ lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+ GFP_KERNEL);
+ lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+ GFP_KERNEL);
+
+ if (!lio->glists_virt_base || !lio->glists_dma_base) {
+ delete_glists(lio);
+ return -ENOMEM;
}
for (i = 0; i < num_iqs; i++) {
@@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs)
INIT_LIST_HEAD(&lio->glist[i]);
+ lio->glists_virt_base[i] =
+ lio_dma_alloc(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ &lio->glists_dma_base[i]);
+
+ if (!lio->glists_virt_base[i]) {
+ delete_glists(lio);
+ return -ENOMEM;
+ }
+
for (j = 0; j < lio->tx_qsize; j++) {
g = kzalloc(sizeof(*g), GFP_KERNEL);
if (!g)
break;
- g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
- OCT_SG_ENTRY_SIZE);
+ g->sg = lio->glists_virt_base[i] +
+ (j * lio->glist_entry_size);
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
- }
+ g->sg_dma_ptr = lio->glists_dma_base[i] +
+ (j * lio->glist_entry_size);
- /* The gather component should be aligned on 64-bit
- * boundary
- */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
- }
list_add_tail(&g->list, &lio->glist[i]);
}
if (j != lio->tx_qsize) {
delete_glists(lio);
- return 1;
+ return -ENOMEM;
}
}
@@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf)
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
@@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf)
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
@@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
i++;
}
- dptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
- dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
- __func__);
- dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
- skb->len - skb->data_len,
- DMA_TO_DEVICE);
- for (j = 1; j <= frags; j++) {
- frag = &skb_shinfo(skb)->frags[j - 1];
- dma_unmap_page(&oct->pci_dev->dev,
- g->sg[j >> 2].ptr[j & 3],
- frag->size, DMA_TO_DEVICE);
- }
- return NETDEV_TX_BUSY;
- }
+ dptr = g->sg_dma_ptr;
ndata.cmd.cmd3.dptr = dptr;
finfo->dptr = dptr;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index b3dc2e9651a8..d29ebc531151 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -71,17 +71,17 @@
#define CN23XX_MAX_RINGS_PER_VF 8
#define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF
-#define CN23XX_MAX_IQ_DESCRIPTORS 2048
+#define CN23XX_MAX_IQ_DESCRIPTORS 512
#define CN23XX_DB_MIN 1
#define CN23XX_DB_MAX 8
#define CN23XX_DB_TIMEOUT 1
#define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF
-#define CN23XX_MAX_OQ_DESCRIPTORS 2048
+#define CN23XX_MAX_OQ_DESCRIPTORS 512
#define CN23XX_OQ_BUF_SIZE 1536
#define CN23XX_OQ_PKTSPER_INTR 128
/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define CN23XX_OQ_REFIL_THRESHOLD 128
+#define CN23XX_OQ_REFIL_THRESHOLD 16
#define CN23XX_OQ_INTR_PKT 64
#define CN23XX_OQ_INTR_TIME 100
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 0be87d119a97..79f809479af6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
recv_buffer_destroy(droq->recv_buf_list[i].buffer,
pg_info);
- if (droq->desc_ring && droq->desc_ring[i].info_ptr)
- lio_unmap_ring_info(oct->pci_dev,
- (u64)droq->
- desc_ring[i].info_ptr,
- OCT_DROQ_INFO_SIZE);
droq->recv_buf_list[i].buffer = NULL;
}
@@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
vfree(droq->recv_buf_list);
if (droq->info_base_addr)
- cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
- droq->info_alloc_size,
- droq->info_base_addr,
- droq->info_list_dma);
+ lio_free_info_buffer(oct, droq);
if (droq->desc_ring)
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct,
dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
droq->max_count);
- droq->info_list =
- cnnic_numa_alloc_aligned_dma((droq->max_count *
- OCT_DROQ_INFO_SIZE),
- &droq->info_alloc_size,
- &droq->info_base_addr,
- numa_node);
+ droq->info_list = lio_alloc_info_buffer(oct, droq);
if (!droq->info_list) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index e62074090681..6982c0af5ecc 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -325,10 +325,10 @@ struct octeon_droq {
size_t desc_ring_dma;
/** Info ptr list are allocated at this virtual address. */
- size_t info_base_addr;
+ void *info_base_addr;
/** DMA mapped address of the info list */
- size_t info_list_dma;
+ dma_addr_t info_list_dma;
/** Allocated size of info list. */
u32 info_alloc_size;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index aa36e9ae7676..bed9ef17bc26 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -140,48 +140,6 @@ err_release_region:
return 1;
}
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
- u32 *alloc_size,
- size_t *orig_ptr,
- int numa_node)
-{
- int retries = 0;
- void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES 1
- do {
- struct page *page = NULL;
-
- page = alloc_pages_node(numa_node,
- GFP_KERNEL,
- get_order(size));
- if (!page)
- page = alloc_pages(GFP_KERNEL,
- get_order(size));
- ptr = (void *)page_address(page);
- if ((unsigned long)ptr & 0x07) {
- __free_pages(page, get_order(size));
- ptr = NULL;
- /* Increment the size required if the first
- * attempt failed.
- */
- if (!retries)
- size += 7;
- }
- retries++;
- } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
- *alloc_size = size;
- *orig_ptr = (unsigned long)ptr;
- if ((unsigned long)ptr & 0x07)
- ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
- return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
- free_pages(orig_ptr, get_order(size))
-
static inline int
sleep_cond(wait_queue_head_t *wait_queue, int *condition)
{
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 6bb89419006e..eef2a1e8a7e3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -62,6 +62,9 @@ struct lio {
/** Array of gather component linked lists */
struct list_head *glist;
+ void **glists_virt_base;
+ dma_addr_t *glists_dma_base;
+ u32 glist_entry_size;
/** Pointer to the NIC properties for the Octeon device this network
* interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
#define lio_dma_free(oct, size, virt_addr, dma_addr) \
dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ void *virt_ptr;
+
+ virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+ &droq->info_list_dma);
+ if (virt_ptr) {
+ droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+ droq->info_base_addr = virt_ptr;
+ }
+
+ return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+ droq->info_list_dma);
+}
+
static inline
void *get_rbd(struct sk_buff *skb)
{
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
static inline u64
lio_map_ring_info(struct octeon_droq *droq, u32 i)
{
- dma_addr_t dma_addr;
- struct octeon_device *oct = droq->oct_dev;
-
- dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
- OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
- WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
- return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
- u64 info_ptr, u32 size)
-{
- dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+ return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
}
static inline u64
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index e739c7153562..2269ff562d95 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -269,6 +269,7 @@ struct nicvf {
#define MAX_QUEUES_PER_QSET 8
struct queue_set *qs;
struct nicvf_cq_poll *napi[8];
+ void *iommu_domain;
u8 vf_id;
u8 sqs_id;
bool sqs_mode;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 6feaa24bcfd4..24017588f531 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -16,6 +16,7 @@
#include <linux/log2.h>
#include <linux/prefetch.h>
#include <linux/irq.h>
+#include <linux/iommu.h>
#include "nic_reg.h"
#include "nic.h"
@@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
/* Get actual TSO descriptors and free them */
tso_sqe =
(struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+ hdr->subdesc_cnt);
}
nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
prefetch(skb);
@@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
{
struct sk_buff *skb;
struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf *snic = nic;
int err = 0;
int rq_idx;
@@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
if (err && !cqe_rx->rb_cnt)
return;
- skb = nicvf_get_rcv_skb(nic, cqe_rx);
+ skb = nicvf_get_rcv_skb(snic, cqe_rx);
if (!skb) {
netdev_dbg(nic->netdev, "Packet not received\n");
return;
@@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!pass1_silicon(nic->pdev))
nic->hw_tso = true;
+ /* Get iommu domain for iova to physical addr conversion */
+ nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
if (sdevid == 0xA134)
nic->t88 = true;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index ac0390be3b12..f13289f0d238 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/etherdevice.h>
+#include <linux/iommu.h>
#include <net/ip.h>
#include <net/tso.h>
@@ -18,6 +19,16 @@
#include "q_struct.h"
#include "nicvf_queues.h"
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+ /* Translation is installed only when IOMMU is present */
+ if (nic->iommu_domain)
+ return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+ return dma_addr;
+}
+
static void nicvf_get_page(struct nicvf *nic)
{
if (!nic->rb_pageref || !nic->rb_page)
@@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
u32 buf_len, u64 **rbuf)
{
- int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0;
+ int order = NICVF_PAGE_ORDER;
/* Check if request can be accomodated in previous allocated page */
if (nic->rb_page &&
@@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
}
nicvf_get_page(nic);
- nic->rb_page = NULL;
/* Allocate a new page */
+ nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+ order);
if (!nic->rb_page) {
- nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
- order);
- if (!nic->rb_page) {
- this_cpu_inc(nic->pnicvf->drv_stats->
- rcv_buffer_alloc_failures);
- return -ENOMEM;
- }
- nic->rb_page_offset = 0;
+ this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+ return -ENOMEM;
}
-
+ nic->rb_page_offset = 0;
ret:
- *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+ /* HW will ensure data coherency, CPU sync not required */
+ *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+ nic->rb_page_offset, buf_len,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC));
+ if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+ if (!nic->rb_page_offset)
+ __free_pages(nic->rb_page, order);
+ nic->rb_page = NULL;
+ return -ENOMEM;
+ }
nic->rb_page_offset += buf_len;
return 0;
@@ -158,16 +174,21 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
rbdr->dma_size = buf_size;
rbdr->enable = true;
rbdr->thresh = RBDR_THRESH;
+ rbdr->head = 0;
+ rbdr->tail = 0;
nic->rb_page = NULL;
for (idx = 0; idx < ring_len; idx++) {
err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
&rbuf);
- if (err)
+ if (err) {
+ /* To free already allocated and mapped ones */
+ rbdr->tail = idx - 1;
return err;
+ }
desc = GET_RBDR_DESC(rbdr, idx);
- desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+ desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
}
nicvf_get_page(nic);
@@ -179,7 +200,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
{
int head, tail;
- u64 buf_addr;
+ u64 buf_addr, phys_addr;
struct rbdr_entry_t *desc;
if (!rbdr)
@@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
head = rbdr->head;
tail = rbdr->tail;
- /* Free SKBs */
+ /* Release page references */
while (head != tail) {
desc = GET_RBDR_DESC(rbdr, head);
- buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
- put_page(virt_to_page(phys_to_virt(buf_addr)));
+ buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
head++;
head &= (rbdr->dmem.q_len - 1);
}
- /* Free SKB of tail desc */
+ /* Release buffer of tail desc */
desc = GET_RBDR_DESC(rbdr, tail);
- buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
- put_page(virt_to_page(phys_to_virt(buf_addr)));
+ buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
/* Free RBDR ring */
nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -250,7 +279,7 @@ refill:
break;
desc = GET_RBDR_DESC(rbdr, tail);
- desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+ desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
refill_rb_cnt--;
new_rb++;
}
@@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
return 0;
}
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt)
+{
+ u8 idx;
+ struct sq_gather_subdesc *gather;
+
+ /* Unmap DMA mapped skb data buffers */
+ for (idx = 0; idx < subdesc_cnt; idx++) {
+ hdr_sqe++;
+ hdr_sqe &= (sq->dmem.q_len - 1);
+ gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+ gather->size, DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+}
+
static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
{
struct sk_buff *skb;
+ struct sq_hdr_subdesc *hdr;
+ struct sq_hdr_subdesc *tso_sqe;
if (!sq)
return;
@@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
smp_rmb();
while (sq->head != sq->tail) {
skb = (struct sk_buff *)sq->skbuff[sq->head];
- if (skb)
- dev_kfree_skb_any(skb);
+ if (!skb)
+ goto next;
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+ /* Check for dummy descriptor used for HW TSO offload on 88xx */
+ if (hdr->dont_send) {
+ /* Get actual TSO descriptors and unmap them */
+ tso_sqe =
+ (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+ hdr->subdesc_cnt);
+ }
+ dev_kfree_skb_any(skb);
+next:
sq->head++;
sq->head &= (sq->dmem.q_len - 1);
}
@@ -559,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
nicvf_send_msg_to_pf(nic, &mbx);
if (!nic->sqs_mode && (qidx == 0)) {
- /* Enable checking L3/L4 length and TCP/UDP checksums */
+ /* Enable checking L3/L4 length and TCP/UDP checksums
+ * Also allow IPv6 pkts with zero UDP checksum.
+ */
nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
- (BIT(24) | BIT(23) | BIT(21)));
+ (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
nicvf_config_vlan_stripping(nic, nic->netdev->features);
}
@@ -882,6 +947,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
return qentry;
}
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+ int qentry, int desc_cnt)
+{
+ sq->tail = qentry;
+ atomic_add(desc_cnt, &sq->free_cnt);
+}
+
/* Free descriptor back to SQ for future use */
void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
{
@@ -1207,8 +1280,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
struct sk_buff *skb, u8 sq_num)
{
int i, size;
- int subdesc_cnt, tso_sqe = 0;
+ int subdesc_cnt, hdr_sqe = 0;
int qentry;
+ u64 dma_addr;
subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1223,12 +1297,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
/* Add SQ header subdesc */
nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
skb, skb->len);
- tso_sqe = qentry;
+ hdr_sqe = qentry;
/* Add SQ gather subdescs */
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
- nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+ offset_in_page(skb->data), size,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
/* Check for scattered buffer */
if (!skb_is_nonlinear(skb))
@@ -1241,15 +1324,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_frag_size(frag);
- nicvf_sq_add_gather_subdesc(sq, qentry, size,
- virt_to_phys(
- skb_frag_address(frag)));
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+ skb_frag_page(frag),
+ frag->page_offset, size,
+ DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ /* Free entire chain of mapped buffers
+ * here 'i' = frags mapped + above mapped skb->data
+ */
+ nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
}
doorbell:
if (nic->t88 && skb_shinfo(skb)->gso_size) {
qentry = nicvf_get_nxt_sqentry(sq, qentry);
- nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+ nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
}
nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
@@ -1282,6 +1376,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
int offset;
u16 *rb_lens = NULL;
u64 *rb_ptrs = NULL;
+ u64 phys_addr;
rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
@@ -1296,15 +1391,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
else
rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
- netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
- __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
payload_len = rb_lens[frag_num(frag)];
+ phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+ if (!phys_addr) {
+ if (skb)
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
if (!frag) {
/* First fragment */
+ dma_unmap_page_attrs(&nic->pdev->dev,
+ *rb_ptrs - cqe_rx->align_pad,
+ RCV_FRAG_LEN, DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
skb = nicvf_rb_ptr_to_skb(nic,
- *rb_ptrs - cqe_rx->align_pad,
+ phys_addr - cqe_rx->align_pad,
payload_len);
if (!skb)
return NULL;
@@ -1312,8 +1415,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
skb_put(skb, payload_len);
} else {
/* Add fragments */
- page = virt_to_page(phys_to_virt(*rb_ptrs));
- offset = phys_to_virt(*rb_ptrs) - page_address(page);
+ dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+ RCV_FRAG_LEN, DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page = virt_to_page(phys_to_virt(phys_addr));
+ offset = phys_to_virt(phys_addr) - page_address(page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
offset, payload_len, RCV_FRAG_LEN);
}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 5cb84da99a2d..10cb4b84625b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -87,7 +87,7 @@
#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13))
#define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13))
#define RBDR_THRESH (RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN 1536 /* In multiples of 128bytes */
#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -301,6 +301,8 @@ struct queue_set {
#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt);
void nicvf_config_vlan_stripping(struct nicvf *nic,
netdev_features_t features);
int nicvf_set_qset_resources(struct nicvf *nic);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 4c8e8cf730bb..64a1095e4d14 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -123,14 +123,44 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
return 1;
}
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+ u16 sdevid;
+
+ if (max_bgx_per_node)
+ return;
+
+ pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ switch (sdevid) {
+ case PCI_SUBSYS_DEVID_81XX_BGX:
+ max_bgx_per_node = MAX_BGX_PER_CN81XX;
+ break;
+ case PCI_SUBSYS_DEVID_83XX_BGX:
+ max_bgx_per_node = MAX_BGX_PER_CN83XX;
+ break;
+ case PCI_SUBSYS_DEVID_88XX_BGX:
+ default:
+ max_bgx_per_node = MAX_BGX_PER_CN88XX;
+ break;
+ }
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+ int idx = (node * max_bgx_per_node) + bgx_idx;
+
+ return bgx_vnic[idx];
+}
+
/* Return number of BGX present in HW */
unsigned bgx_get_map(int node)
{
int i;
unsigned map = 0;
- for (i = 0; i < MAX_BGX_PER_NODE; i++) {
- if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+ for (i = 0; i < max_bgx_per_node; i++) {
+ if (bgx_vnic[(node * max_bgx_per_node) + i])
map |= (1 << i);
}
@@ -143,7 +173,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (bgx)
return bgx->lmac_count;
@@ -158,7 +188,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
struct bgx *bgx;
struct lmac *lmac;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
@@ -172,7 +202,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
if (bgx)
return bgx->lmac[lmacid].mac;
@@ -183,7 +213,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
@@ -194,7 +224,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
@@ -217,7 +247,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
{
struct pfc *pfc = (struct pfc *)pause;
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
@@ -237,7 +267,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc);
void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
{
struct pfc *pfc = (struct pfc *)pause;
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
@@ -369,7 +399,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return 0;
@@ -383,7 +413,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return 0;
@@ -411,7 +441,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
struct lmac *lmac;
u64 cfg;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
@@ -1011,12 +1041,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
dev_info(dev, "%s: 40G_KR4\n", (char *)str);
break;
case BGX_MODE_QSGMII:
- if ((lmacid == 0) &&
- (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
- return;
- if ((lmacid == 2) &&
- (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
- return;
dev_info(dev, "%s: QSGMII\n", (char *)str);
break;
case BGX_MODE_RGMII:
@@ -1334,11 +1358,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_release_regions;
}
+ set_max_bgx_per_node(pdev);
+
pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
bgx->bgx_id = (pci_resource_start(pdev,
PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
- bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+ bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
bgx->max_lmac = MAX_LMAC_PER_BGX;
bgx_vnic[bgx->bgx_id] = bgx;
} else {
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index a60f189429bb..c5080f2cead5 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -22,7 +22,6 @@
#define MAX_BGX_PER_CN88XX 2
#define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */
#define MAX_BGX_PER_CN83XX 4
-#define MAX_BGX_PER_NODE 4
#define MAX_LMAC_PER_BGX 4
#define MAX_BGX_CHANS_PER_LMAC 16
#define MAX_DMAC_PER_LMAC 8
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 275c2e2349ad..c44036d5761a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
static int emac_dt_phy_connect(struct emac_instance *dev,
struct device_node *phy_handle)
{
- int res;
-
dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
GFP_KERNEL);
if (!dev->phy.def)
@@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
{
struct device_node *np = dev->ofdev->dev.of_node;
struct device_node *phy_handle;
- int res = 0;
+ int res = 1;
phy_handle = of_parse_phandle(np, "phy-handle", 0);
@@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev)
if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
int res = emac_dt_phy_probe(dev);
- mutex_unlock(&emac_phy_map_lock);
- if (!res)
+ switch (res) {
+ case 1:
+ /* No phy-handle property configured.
+ * Continue with the existing phy probe
+ * and setup code.
+ */
+ break;
+
+ case 0:
+ mutex_unlock(&emac_phy_map_lock);
goto init_phy;
- dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
- res);
- return res;
+ default:
+ mutex_unlock(&emac_phy_map_lock);
+ dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+ res);
+ return res;
+ }
}
if (dev->phy_address != 0xffffffff)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9198e6bd5160..5f11b4dc95d2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev)
send_map_query(adapter);
for (i = 0; i < rxadd_subcrqs; i++) {
init_rx_pool(adapter, &adapter->rx_pool[i],
- IBMVNIC_BUFFS_PER_POOL, i,
+ adapter->req_rx_add_entries_per_subcrq, i,
be64_to_cpu(size_array[i]), 1);
if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev)
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
tx_pool->tx_buff =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
if (!tx_pool->tx_buff)
goto tx_pool_alloc_failed;
if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
- adapter->max_tx_entries_per_subcrq *
+ adapter->req_tx_entries_per_subcrq *
adapter->req_mtu))
goto tx_ltb_alloc_failed;
tx_pool->free_map =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(int), GFP_KERNEL);
if (!tx_pool->free_map)
goto tx_fm_alloc_failed;
- for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+ for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
tx_pool->free_map[j] = j;
tx_pool->consumer_index = 0;
@@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_tx_buff *tx_buff = NULL;
+ struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0;
unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
int ret = 0;
tx_pool = &adapter->tx_pool[queue_num];
+ tx_scrq = adapter->tx_scrq[queue_num];
txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->
@@ -744,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
tx_buff = &tx_pool->tx_buff[index];
tx_buff->skb = skb;
@@ -817,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (tx_pool->consumer_index == 0)
tx_pool->consumer_index =
- adapter->max_tx_entries_per_subcrq - 1;
+ adapter->req_tx_entries_per_subcrq - 1;
else
tx_pool->consumer_index--;
@@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ret = NETDEV_TX_BUSY;
goto out;
}
+
+ atomic_inc(&tx_scrq->used);
+
+ if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+ netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netif_stop_subqueue(netdev, queue_num);
+ }
+
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
@@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
scrq->adapter = adapter;
scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
scrq->cur = 0;
+ atomic_set(&scrq->used, 0);
scrq->rx_skb_top = NULL;
spin_lock_init(&scrq->lock);
@@ -1355,14 +1366,28 @@ restart_loop:
DMA_TO_DEVICE);
}
- if (txbuff->last_frag)
+ if (txbuff->last_frag) {
+ atomic_dec(&scrq->used);
+
+ if (atomic_read(&scrq->used) <=
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+ netif_subqueue_stopped(adapter->netdev,
+ txbuff->skb)) {
+ netif_wake_subqueue(adapter->netdev,
+ scrq->pool_index);
+ netdev_dbg(adapter->netdev,
+ "Started queue %d\n",
+ scrq->pool_index);
+ }
+
dev_kfree_skb_any(txbuff->skb);
+ }
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
producer_index] = index;
adapter->tx_pool[pool].producer_index =
(adapter->tx_pool[pool].producer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
}
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 422824f1f42a..1993b42666f7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue {
spinlock_t lock;
struct sk_buff *rx_skb_top;
struct ibmvnic_adapter *adapter;
+ atomic_t used;
};
struct ibmvnic_long_term_buff {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index ddb4ca4ff930..117170014e88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,6 +14,7 @@ config MLX5_CORE
config MLX5_CORE_EN
bool "Mellanox Technologies ConnectX-4 Ethernet support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+ depends on IPV6=y || IPV6=n || MLX5_CORE=m
imply PTP_1588_CLOCK
default n
---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 0523ed47f597..8fa23f6a1f67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_dcbx *dcbx = &priv->dcbx;
+ if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+ return 1;
+
if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
return 0;
@@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
return 1;
}
- if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+ if (!(mode & DCB_CAP_DCBX_HOST))
return 1;
- if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
- !(mode & DCB_CAP_DCBX_VER_CEE) ||
- !(mode & DCB_CAP_DCBX_VER_IEEE) ||
- !(mode & DCB_CAP_DCBX_HOST))
+ if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
return 1;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 31e3cb7ee5fe..5621dcfda4f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
struct iphdr *iph;
/* We are only going to peek, no need to clone the SKB */
- if (skb->protocol != htons(ETH_P_IP))
- goto out;
-
if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
goto out;
@@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
lbtp->loopback_ok = false;
init_completion(&lbtp->comp);
- lbtp->pt.type = htons(ETH_P_ALL);
+ lbtp->pt.type = htons(ETH_P_IP);
lbtp->pt.func = mlx5e_test_loopback_validate;
lbtp->pt.dev = priv->netdev;
lbtp->pt.af_packet_priv = lbtp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 44406a5ec15d..79481f4cf264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -48,9 +48,14 @@
#include "eswitch.h"
#include "vxlan.h"
+enum {
+ MLX5E_TC_FLOW_ESWITCH = BIT(0),
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
u64 cookie;
+ u8 flags;
struct mlx5_flow_handle *rule;
struct list_head encap; /* flows sharing the same encap */
struct mlx5_esw_flow_attr *attr;
@@ -177,7 +182,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
mlx5_fc_destroy(priv->mdev, counter);
}
- if (esw && esw->mode == SRIOV_OFFLOADS) {
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
mlx5_eswitch_del_vlan_action(esw, flow->attr);
if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
mlx5e_detach_encap(priv, flow);
@@ -598,6 +603,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
}
static int parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f)
{
@@ -609,7 +615,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
err = __parse_cls_flower(priv, spec, f, &min_inline);
- if (!err && esw->mode == SRIOV_OFFLOADS &&
+ if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
rep->vport != FDB_UPLINK_VPORT) {
if (min_inline > esw->offloads.inline_mode) {
netdev_warn(priv->netdev,
@@ -1132,23 +1138,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
struct tc_cls_flower_offload *f)
{
struct mlx5e_tc_table *tc = &priv->fs.tc;
- int err = 0;
- bool fdb_flow = false;
+ int err, attr_size = 0;
u32 flow_tag, action;
struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u8 flow_flags = 0;
- if (esw && esw->mode == SRIOV_OFFLOADS)
- fdb_flow = true;
-
- if (fdb_flow)
- flow = kzalloc(sizeof(*flow) +
- sizeof(struct mlx5_esw_flow_attr),
- GFP_KERNEL);
- else
- flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (esw && esw->mode == SRIOV_OFFLOADS) {
+ flow_flags = MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ }
+ flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec || !flow) {
err = -ENOMEM;
@@ -1156,12 +1158,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
}
flow->cookie = f->cookie;
+ flow->flags = flow_flags;
- err = parse_cls_flower(priv, spec, f);
+ err = parse_cls_flower(priv, flow, spec, f);
if (err < 0)
goto err_free;
- if (fdb_flow) {
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1);
err = parse_tc_fdb_actions(priv, f->exts, flow);
if (err < 0)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2478516a61e2..ded27bb9a3b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
u32 *match_criteria)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct list_head *prev = ft->node.children.prev;
+ struct list_head *prev = &ft->node.children;
unsigned int candidate_index = 0;
struct mlx5_flow_group *fg;
void *match_criteria_addr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c4242a4e8130..e2bd600d19de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev,
if (err)
goto clean_load;
+ pci_save_state(pdev);
return 0;
clean_load:
@@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_enter_error_state(dev);
mlx5_unload_one(dev, priv, false);
- /* In case of kernel call save the pci state and drain the health wq */
+ /* In case of kernel call drain the health wq */
if (state) {
- pci_save_state(pdev);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
}
@@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
pci_set_master(pdev);
pci_restore_state(pdev);
+ pci_save_state(pdev);
if (wait_vital(pdev)) {
dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0899e2d310e2..d9616daf8a70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
#define MLXSW_REG_SPVM_ID 0x200F
#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \
MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
@@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
#define MLXSW_REG_SPVMLR_ID 0x2020
#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
MLXSW_REG_SPVMLR_REC_LEN * \
MLXSW_REG_SPVMLR_REC_MAX_COUNT)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 22ab42925377..ae6cccc666e4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
ingress,
MLXSW_SP_ACL_PROFILE_FLOWER);
- if (WARN_ON(IS_ERR(ruleset)))
+ if (IS_ERR(ruleset))
return;
rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
- if (!WARN_ON(!rule)) {
+ if (rule) {
mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index d42d03df751a..7e3a6fed3da6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+ u32 align = elems_per_page * DQ_RANGE_ALIGN;
- p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+ p_conn->cid_count = roundup(p_conn->cid_count, align);
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index e2a081ceaf52..e518f914eab1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev,
* size/capacity fields are of a u32 type.
*/
if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
- chain_size > 0x10000) ||
- (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
- chain_size > 0x100000000ULL)) {
+ chain_size > ((u32)U16_MAX + 1)) ||
+ (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
DP_NOTICE(cdev,
"The actual chain size (0x%llx) is larger than the maximal possible value\n",
chain_size);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 3a44d6b395fa..098766f7fe88 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+ p_init->ooo_enable = p_params->ooo_enable;
+ p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+ p_params->ll2_ooo_queue_id;
p_init->func_params.log_page_size = p_params->log_page_size;
val = p_params->num_tasks;
p_init->func_params.num_tasks = cpu_to_le16(val);
@@ -786,6 +789,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
}
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+ struct qed_iscsi_conn *p_conn)
+{
+ qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+ qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+ qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct tcp_upload_params),
+ p_conn->tcp_upload_params_virt_addr,
+ p_conn->tcp_upload_params_phys_addr);
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct scsi_terminate_extra_params),
+ p_conn->queue_cnts_virt_addr,
+ p_conn->queue_cnts_phys_addr);
+ kfree(p_conn);
+}
+
struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
{
struct qed_iscsi_info *p_iscsi_info;
@@ -807,6 +827,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
void qed_iscsi_free(struct qed_hwfn *p_hwfn,
struct qed_iscsi_info *p_iscsi_info)
{
+ struct qed_iscsi_conn *p_conn = NULL;
+
+ while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+ p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+ struct qed_iscsi_conn, list_entry);
+ if (p_conn) {
+ list_del(&p_conn->list_entry);
+ qed_iscsi_free_connection(p_hwfn, p_conn);
+ }
+ }
+
kfree(p_iscsi_info);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 9a0b9af10a57..0d3cef409c96 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
/* If need to reuse or there's no replacement buffer, repost this */
if (rc)
goto out_post;
+ dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+ cdev->ll2->rx_size, DMA_FROM_DEVICE);
skb = build_skb(buffer->data, 0);
if (!skb) {
@@ -474,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
union core_rx_cqe_union *p_cqe,
- unsigned long lock_flags,
+ unsigned long *p_lock_flags,
bool b_last_cqe)
{
struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -495,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
"Mismatch between active_descq and the LL2 Rx chain\n");
list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
- spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+ spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
- spin_lock_irqsave(&p_rx->lock, lock_flags);
+ spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
return 0;
}
@@ -538,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
break;
case CORE_RX_CQE_TYPE_REGULAR:
rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
- cqe, flags, b_last_cqe);
+ cqe, &flags,
+ b_last_cqe);
break;
default:
rc = -EIO;
@@ -968,7 +971,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
{
struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
- struct qed_ll2_conn ll2_info;
+ struct qed_ll2_conn ll2_info = { 0 };
int rc;
ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 7d731c6cb892..378afce58b3f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
if (!p_ooo_info->ooo_history.p_cqes)
goto no_history_mem;
+ p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
return p_ooo_info;
no_history_mem:
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 65077c77082a..91e9bd7159ab 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev)
* Ethtool support
*/
static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct smc_local *lp = netdev_priv(dev);
int ret;
- cmd->maxtxpkt = 1;
- cmd->maxrxpkt = 1;
-
if (lp->phy_type != 0) {
spin_lock_irq(&lp->lock);
- ret = mii_ethtool_gset(&lp->mii, cmd);
+ ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
spin_unlock_irq(&lp->lock);
} else {
- cmd->supported = SUPPORTED_10baseT_Half |
+ u32 supported = SUPPORTED_10baseT_Half |
SUPPORTED_10baseT_Full |
SUPPORTED_TP | SUPPORTED_AUI;
if (lp->ctl_rspeed == 10)
- ethtool_cmd_speed_set(cmd, SPEED_10);
+ cmd->base.speed = SPEED_10;
else if (lp->ctl_rspeed == 100)
- ethtool_cmd_speed_set(cmd, SPEED_100);
+ cmd->base.speed = SPEED_100;
+
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.port = 0;
+ cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+ DUPLEX_FULL : DUPLEX_HALF;
- cmd->autoneg = AUTONEG_DISABLE;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->port = 0;
- cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.supported, supported);
ret = 0;
}
@@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
}
static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct smc_local *lp = netdev_priv(dev);
int ret;
if (lp->phy_type != 0) {
spin_lock_irq(&lp->lock);
- ret = mii_ethtool_sset(&lp->mii, cmd);
+ ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
spin_unlock_irq(&lp->lock);
} else {
- if (cmd->autoneg != AUTONEG_DISABLE ||
- cmd->speed != SPEED_10 ||
- (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
- (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+ if (cmd->base.autoneg != AUTONEG_DISABLE ||
+ cmd->base.speed != SPEED_10 ||
+ (cmd->base.duplex != DUPLEX_HALF &&
+ cmd->base.duplex != DUPLEX_FULL) ||
+ (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
return -EINVAL;
-// lp->port = cmd->port;
- lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+// lp->port = cmd->base.port;
+ lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
// if (netif_running(dev))
// smc_set_port(dev);
@@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev,
static const struct ethtool_ops smc_ethtool_ops = {
- .get_settings = smc_ethtool_getsettings,
- .set_settings = smc_ethtool_setsettings,
.get_drvinfo = smc_ethtool_getdrvinfo,
.get_msglevel = smc_ethtool_getmsglevel,
@@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = {
.get_eeprom_len = smc_ethtool_geteeprom_len,
.get_eeprom = smc_ethtool_geteeprom,
.set_eeprom = smc_ethtool_seteeprom,
+ .get_link_ksettings = smc_ethtool_get_link_ksettings,
+ .set_link_ksettings = smc_ethtool_set_link_ksettings,
};
static const struct net_device_ops smc_netdev_ops = {
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d3e73ac158ae..f9f3dba7a588 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -700,6 +700,8 @@ struct net_device_context {
u32 tx_checksum_mask;
+ u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
/* Ethtool settings */
u8 duplex;
u32 speed;
@@ -757,7 +759,6 @@ struct netvsc_device {
struct nvsp_message revoke_packet;
- u32 send_table[VRSS_SEND_TAB_SIZE];
u32 max_chn;
u32 num_chn;
spinlock_t sc_lock; /* Protects num_sc_offered variable */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d35ebd993b38..4c1d8cca247b 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev,
static void netvsc_send_table(struct hv_device *hdev,
struct nvsp_message *nvmsg)
{
- struct netvsc_device *nvscdev;
struct net_device *ndev = hv_get_drvdata(hdev);
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
int i;
u32 count, *tab;
- nvscdev = get_outbound_net_device(hdev);
- if (!nvscdev)
- return;
-
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev,
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
- nvscdev->send_table[i] = tab[i];
+ net_device_ctx->tx_send_table[i] = tab[i];
}
static void netvsc_send_vf(struct net_device_context *net_device_ctx,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index bc05c895d958..5ede87f30463 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+ unsigned int num_tx_queues = ndev->real_num_tx_queues;
struct sock *sk = skb->sk;
int q_idx = sk_tx_queue_get(sk);
- if (q_idx < 0 || skb->ooo_okay ||
- q_idx >= ndev->real_num_tx_queues) {
+ if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
int new_idx;
- new_idx = nvsc_dev->send_table[hash]
- % nvsc_dev->num_chn;
+ new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
if (q_idx != new_idx && sk &&
sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
@@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
q_idx = new_idx;
}
- if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
- q_idx = 0;
-
return q_idx;
}
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index f9d0fa315a47..272b051a0199 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev)
return m88e1510_hwmon_probe(phydev);
}
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
- struct marvell_priv *priv = phydev->priv;
-
- if (priv && priv->hwmon_dev)
- hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
static struct phy_driver marvell_drivers[] = {
{
.phy_id = MARVELL_PHY_ID_88E1101,
@@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = &m88e1121_probe,
- .remove = &marvell_remove,
.config_init = &m88e1121_config_init,
.config_aneg = &m88e1121_config_aneg,
.read_status = &marvell_read_status,
@@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = {
.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
.flags = PHY_HAS_INTERRUPT,
.probe = &m88e1510_probe,
- .remove = &marvell_remove,
.config_init = &m88e1510_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
@@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = m88e1510_probe,
- .remove = &marvell_remove,
.config_init = &marvell_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
@@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = {
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1545",
.probe = m88e1510_probe,
- .remove = &marvell_remove,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &marvell_config_init,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index daec6555f3b1..5198ccfa347f 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic PHY",
- .soft_reset = genphy_soft_reset,
+ .soft_reset = genphy_no_soft_reset,
.config_init = genphy_config_init,
.features = PHY_GBIT_FEATURES | SUPPORTED_MII |
SUPPORTED_AUI | SUPPORTED_FIBRE |
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 93ffedfa2994..1e2d4f1179da 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,13 +491,14 @@ static int ks8995_probe(struct spi_device *spi)
if (err)
return err;
- ks->regs_attr.size = ks->chip->regs_size;
memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+ ks->regs_attr.size = ks->chip->regs_size;
err = ks8995_reset(ks);
if (err)
return err;
+ sysfs_attr_init(&ks->regs_attr.attr);
err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
if (err) {
dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 4a24b5d15f5a..1b52520715ae 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2072,6 +2072,7 @@ static int team_dev_type_check_change(struct net_device *dev,
static void team_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = ETH_MAX_MTU;
dev->netdev_ops = &team_netdev_ops;
dev->ethtool_ops = &team_ethtool_ops;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dc1b1dd9157c..34cc3c590aa5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev)
/* Net device open. */
static int tun_net_open(struct net_device *dev)
{
+ struct tun_struct *tun = netdev_priv(dev);
+ int i;
+
netif_tx_start_all_queues(dev);
+
+ for (i = 0; i < tun->numqueues; i++) {
+ struct tun_file *tfile;
+
+ tfile = rtnl_dereference(tun->tfiles[i]);
+ tfile->socket.sk->sk_write_space(tfile->socket.sk);
+ }
+
return 0;
}
@@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
if (!skb_array_empty(&tfile->tx_array))
mask |= POLLIN | POLLRDNORM;
- if (sock_writeable(sk) ||
- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
- sock_writeable(sk)))
+ if (tun->dev->flags & IFF_UP &&
+ (sock_writeable(sk) ||
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+ sock_writeable(sk))))
mask |= POLLOUT | POLLWRNORM;
if (tun->dev->reg_state != NETREG_REGISTERED)
@@ -2570,7 +2582,6 @@ static int __init tun_init(void)
int ret = 0;
pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
- pr_info("%s\n", DRV_COPYRIGHT);
ret = rtnl_link_register(&tun_link_ops);
if (ret) {
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 22379da63400..fea687f35b5a 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ int len = skb->len;
netdev_tx_t ret = is_ip_tx_frame(skb, dev);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
@@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
u64_stats_update_begin(&dstats->syncp);
dstats->tx_pkts++;
- dstats->tx_bytes += skb->len;
+ dstats->tx_bytes += len;
u64_stats_update_end(&dstats->syncp);
} else {
this_cpu_inc(dev->dstats->tx_drps);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e375560cc74e..bdb6ae16d4a8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
return 0;
}
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+ struct vxlan_config *conf)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
+ err = vxlan_dev_configure(net, dev, conf, false);
+ if (err)
+ return err;
+
+ dev->ethtool_ops = &vxlan_ethtool_ops;
+
+ /* create an fdb entry for a valid default destination */
+ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+ err = vxlan_fdb_create(vxlan, all_zeros_mac,
+ &vxlan->default_dst.remote_ip,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_EXCL | NLM_F_CREATE,
+ vxlan->cfg.dst_port,
+ vxlan->default_dst.remote_vni,
+ vxlan->default_dst.remote_vni,
+ vxlan->default_dst.remote_ifindex,
+ NTF_SELF);
+ if (err)
+ return err;
+ }
+
+ err = register_netdevice(dev);
+ if (err) {
+ vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+ return err;
+ }
+
+ list_add(&vxlan->next, &vn->vxlan_list);
+ return 0;
+}
+
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
bool changelink)
@@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
static int vxlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
- struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_config conf;
int err;
@@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (err)
return err;
- err = vxlan_dev_configure(src_net, dev, &conf, false);
- if (err)
- return err;
-
- dev->ethtool_ops = &vxlan_ethtool_ops;
-
- /* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
- err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
- NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_EXCL | NLM_F_CREATE,
- vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
- NTF_SELF);
- if (err)
- return err;
- }
-
- err = register_netdevice(dev);
- if (err) {
- vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
- return err;
- }
-
- list_add(&vxlan->next, &vn->vxlan_list);
-
- return 0;
+ return __vxlan_dev_create(src_net, dev, &conf);
}
static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
if (IS_ERR(dev))
return dev;
- err = vxlan_dev_configure(net, dev, conf, false);
+ err = __vxlan_dev_create(net, dev, conf);
if (err < 0) {
free_netdev(dev);
return ERR_PTR(err);
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index a5045b5279d7..6742ae605660 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
/* set bd status and length */
bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
- iowrite16be(bd_status, &bd->status);
iowrite16be(skb->len, &bd->length);
+ iowrite16be(bd_status, &bd->status);
/* Move to next BD in the ring */
if (!(bd_status & T_W_S))
@@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
struct sk_buff *skb;
hdlc_device *hdlc = dev_to_hdlc(dev);
struct qe_bd *bd;
- u32 bd_status;
+ u16 bd_status;
u16 length, howmany = 0;
u8 *bdbuffer;
int i;
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index e7f5910a6519..f8eb66ef2944 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
struct i2400mu *i2400mu;
struct usb_device *usb_dev = interface_to_usbdev(iface);
+ if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+ return -ENODEV;
+
if (usb_dev->speed != USB_SPEED_HIGH)
dev_err(dev, "device not connected as high speed\n");
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 829b26cd4549..8397f6c92451 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->num_queues;
+ unsigned int num_queues;
u16 index;
struct xenvif_rx_cb *cb;
BUG_ON(skb->dev != dev);
- /* Drop the packet if queues are not set up */
+ /* Drop the packet if queues are not set up.
+ * This handler should be called inside an RCU read section
+ * so we don't need to enter it here explicitly.
+ */
+ num_queues = READ_ONCE(vif->num_queues);
if (num_queues < 1)
goto drop;
@@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
+ unsigned int num_queues;
u64 rx_bytes = 0;
u64 rx_packets = 0;
u64 tx_bytes = 0;
u64 tx_packets = 0;
unsigned int index;
- spin_lock(&vif->lock);
- if (vif->queues == NULL)
- goto out;
+ rcu_read_lock();
+ num_queues = READ_ONCE(vif->num_queues);
/* Aggregate tx and rx stats from each queue */
- for (index = 0; index < vif->num_queues; ++index) {
+ for (index = 0; index < num_queues; ++index) {
queue = &vif->queues[index];
rx_bytes += queue->stats.rx_bytes;
rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
tx_packets += queue->stats.tx_packets;
}
-out:
- spin_unlock(&vif->lock);
+ rcu_read_unlock();
vif->dev->stats.rx_bytes = rx_bytes;
vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 * data)
{
struct xenvif *vif = netdev_priv(dev);
- unsigned int num_queues = vif->num_queues;
+ unsigned int num_queues;
int i;
unsigned int queue_index;
+ rcu_read_lock();
+ num_queues = READ_ONCE(vif->num_queues);
+
for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
unsigned long accum = 0;
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
}
data[i] = accum;
}
+
+ rcu_read_unlock();
}
static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f9bcf4a665bc..602d408fa25e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
netdev_err(vif->dev, "fatal error; disabling device\n");
vif->disabled = true;
/* Disable the vif from queue 0's kthread */
- if (vif->queues)
+ if (vif->num_queues)
xenvif_kick_thread(&vif->queues[0]);
}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9145b1..a56d3eab35dd 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
struct xenvif *vif = be->vif;
if (vif) {
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- struct xenvif_queue *queues;
xen_unregister_watchers(vif);
#ifdef CONFIG_DEBUG_FS
xenvif_debugfs_delif(vif);
#endif /* CONFIG_DEBUG_FS */
xenvif_disconnect_data(vif);
- for (queue_index = 0;
- queue_index < vif->num_queues;
- ++queue_index)
- xenvif_deinit_queue(&vif->queues[queue_index]);
- spin_lock(&vif->lock);
- queues = vif->queues;
+ /* At this point some of the handlers may still be active
+ * so we need to have additional synchronization here.
+ */
vif->num_queues = 0;
- vif->queues = NULL;
- spin_unlock(&vif->lock);
+ synchronize_net();
- vfree(queues);
+ for (queue_index = 0; queue_index < num_queues; ++queue_index)
+ xenvif_deinit_queue(&vif->queues[queue_index]);
+
+ vfree(vif->queues);
+ vif->queues = NULL;
xenvif_disconnect_ctrl(vif);
}
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 5be4783e40d4..dea98ffb6f60 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = {
.wapf = 2,
};
-static struct quirk_entry quirk_no_rfkill = {
- .no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
- .wapf = 4,
- .no_rfkill = true,
-};
-
static struct quirk_entry quirk_asus_ux303ub = {
.wmi_backlight_native = true,
};
@@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
},
- .driver_data = &quirk_no_rfkill_wapf4,
+ .driver_data = &quirk_asus_wapf4,
},
{
.callback = dmi_matched,
@@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
},
- .driver_data = &quirk_no_rfkill_wapf4,
+ .driver_data = &quirk_asus_wapf4,
},
{
.callback = dmi_matched,
@@ -369,42 +360,6 @@ static const struct dmi_system_id asus_quirks[] = {
},
{
.callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. X555UB",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. N552VW",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. U303LB",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. Z550MA",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
.ident = "ASUSTeK COMPUTER INC. UX303UB",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 43cb680adbb4..8fe5890bf539 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -159,6 +159,8 @@ MODULE_LICENSE("GPL");
#define USB_INTEL_XUSB2PR 0xD0
#define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
struct bios_args {
u32 arg0;
u32 arg1;
@@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
return 0;
}
+static bool ashs_present(void)
+{
+ int i = 0;
+ while (ashs_ids[i]) {
+ if (acpi_dev_found(ashs_ids[i++]))
+ return true;
+ }
+ return false;
+}
+
/*
* WMI Driver
*/
@@ -2095,7 +2107,11 @@ static int asus_wmi_add(struct platform_device *pdev)
if (err)
goto fail_leds;
- if (!asus->driver->quirks->no_rfkill) {
+ asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+ if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+ asus->driver->wlan_ctrl_by_user = 1;
+
+ if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
err = asus_wmi_rfkill_init(asus);
if (err)
goto fail_rfkill;
@@ -2134,10 +2150,6 @@ static int asus_wmi_add(struct platform_device *pdev)
if (err)
goto fail_debugfs;
- asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
- if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
- asus->driver->wlan_ctrl_by_user = 1;
-
return 0;
fail_debugfs:
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index fdff626c3b51..c9589d9342bb 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -39,7 +39,6 @@ struct key_entry;
struct asus_wmi;
struct quirk_entry {
- bool no_rfkill;
bool hotplug_wireless;
bool scalar_panel_brightness;
bool store_backlight_power;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 2b218b1d13e5..e12cc3504d48 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -78,18 +78,18 @@
#define FUJITSU_LCD_N_LEVELS 8
-#define ACPI_FUJITSU_CLASS "fujitsu"
-#define ACPI_FUJITSU_HID "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_CLASS "fujitsu"
+#define ACPI_FUJITSU_BL_HID "FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_LAPTOP_HID "FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME "Fujitsu FUJ02E3"
#define ACPI_FUJITSU_NOTIFY_CODE1 0x80
/* FUNC interface - command values */
-#define FUNC_RFKILL 0x1000
+#define FUNC_FLAGS 0x1000
#define FUNC_LEDS 0x1001
#define FUNC_BUTTONS 0x1002
#define FUNC_BACKLIGHT 0x1004
@@ -97,6 +97,11 @@
/* FUNC interface - responses */
#define UNSUPPORTED_CMD 0x80000000
+/* FUNC interface - status flags */
+#define FLAG_RFKILL 0x020
+#define FLAG_LID 0x100
+#define FLAG_DOCK 0x200
+
#if IS_ENABLED(CONFIG_LEDS_CLASS)
/* FUNC interface - LED control */
#define FUNC_LED_OFF 0x1
@@ -136,7 +141,7 @@
#endif
/* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
acpi_handle acpi_handle;
struct acpi_device *dev;
struct input_dev *input;
@@ -150,12 +155,12 @@ struct fujitsu_t {
unsigned int brightness_level;
};
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
static int use_alt_lcd_levels = -1;
static int disable_brightness_adjust = -1;
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
acpi_handle acpi_handle;
struct acpi_device *dev;
struct input_dev *input;
@@ -163,17 +168,15 @@ struct fujitsu_hotkey_t {
struct platform_device *pf_device;
struct kfifo fifo;
spinlock_t fifo_lock;
- int rfkill_supported;
- int rfkill_state;
+ int flags_supported;
+ int flags_state;
int logolamp_registered;
int kblamps_registered;
int radio_led_registered;
int eco_led_registered;
};
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
-
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static struct fujitsu_laptop *fujitsu_laptop;
#if IS_ENABLED(CONFIG_LEDS_CLASS)
static enum led_brightness logolamp_get(struct led_classdev *cdev);
@@ -222,8 +225,6 @@ static struct led_classdev eco_led = {
static u32 dbg_level = 0x03;
#endif
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
-
/* Fujitsu ACPI interface function */
static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
@@ -239,7 +240,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
unsigned long long value;
acpi_handle handle = NULL;
- status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+ status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR,
"FUNC interface is not present\n");
@@ -300,9 +301,9 @@ static int radio_led_set(struct led_classdev *cdev,
enum led_brightness brightness)
{
if (brightness >= LED_FULL)
- return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+ return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
else
- return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+ return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
}
static int eco_led_set(struct led_classdev *cdev,
@@ -346,7 +347,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
{
enum led_brightness brightness = LED_OFF;
- if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+ if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
brightness = LED_FULL;
return brightness;
@@ -373,10 +374,10 @@ static int set_lcd_level(int level)
vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
level);
- if (level < 0 || level >= fujitsu->max_brightness)
+ if (level < 0 || level >= fujitsu_bl->max_brightness)
return -EINVAL;
- status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+ status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
return -ENODEV;
@@ -398,10 +399,10 @@ static int set_lcd_level_alt(int level)
vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
level);
- if (level < 0 || level >= fujitsu->max_brightness)
+ if (level < 0 || level >= fujitsu_bl->max_brightness)
return -EINVAL;
- status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+ status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
return -ENODEV;
@@ -421,19 +422,19 @@ static int get_lcd_level(void)
vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
- status =
- acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+ status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+ &state);
if (ACPI_FAILURE(status))
return 0;
- fujitsu->brightness_level = state & 0x0fffffff;
+ fujitsu_bl->brightness_level = state & 0x0fffffff;
if (state & 0x80000000)
- fujitsu->brightness_changed = 1;
+ fujitsu_bl->brightness_changed = 1;
else
- fujitsu->brightness_changed = 0;
+ fujitsu_bl->brightness_changed = 0;
- return fujitsu->brightness_level;
+ return fujitsu_bl->brightness_level;
}
static int get_max_brightness(void)
@@ -443,14 +444,14 @@ static int get_max_brightness(void)
vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
- status =
- acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+ status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+ &state);
if (ACPI_FAILURE(status))
return -1;
- fujitsu->max_brightness = state;
+ fujitsu_bl->max_brightness = state;
- return fujitsu->max_brightness;
+ return fujitsu_bl->max_brightness;
}
/* Backlight device stuff */
@@ -483,7 +484,7 @@ static int bl_update_status(struct backlight_device *b)
return ret;
}
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
.get_brightness = bl_get_brightness,
.update_status = bl_update_status,
};
@@ -511,7 +512,7 @@ show_brightness_changed(struct device *dev,
int ret;
- ret = fujitsu->brightness_changed;
+ ret = fujitsu_bl->brightness_changed;
if (ret < 0)
return ret;
@@ -539,7 +540,7 @@ static ssize_t store_lcd_level(struct device *dev,
int level, ret;
if (sscanf(buf, "%i", &level) != 1
- || (level < 0 || level >= fujitsu->max_brightness))
+ || (level < 0 || level >= fujitsu_bl->max_brightness))
return -EINVAL;
if (use_alt_lcd_levels)
@@ -567,9 +568,9 @@ static ssize_t
show_lid_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+ if (!(fujitsu_laptop->flags_supported & FLAG_LID))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x100)
+ if (fujitsu_laptop->flags_state & FLAG_LID)
return sprintf(buf, "open\n");
else
return sprintf(buf, "closed\n");
@@ -579,9 +580,9 @@ static ssize_t
show_dock_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+ if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x200)
+ if (fujitsu_laptop->flags_state & FLAG_DOCK)
return sprintf(buf, "docked\n");
else
return sprintf(buf, "undocked\n");
@@ -591,9 +592,9 @@ static ssize_t
show_radios_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+ if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x20)
+ if (fujitsu_laptop->flags_state & FLAG_RFKILL)
return sprintf(buf, "on\n");
else
return sprintf(buf, "killed\n");
@@ -607,7 +608,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
&dev_attr_brightness_changed.attr,
&dev_attr_max_brightness.attr,
&dev_attr_lcd_level.attr,
@@ -617,11 +618,11 @@ static struct attribute *fujitsupf_attributes[] = {
NULL
};
-static struct attribute_group fujitsupf_attribute_group = {
- .attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+ .attrs = fujitsu_pf_attributes
};
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
.driver = {
.name = "fujitsu-laptop",
}
@@ -630,39 +631,30 @@ static struct platform_driver fujitsupf_driver = {
static void __init dmi_check_cb_common(const struct dmi_system_id *id)
{
pr_info("Identified laptop model '%s'\n", id->ident);
- if (use_alt_lcd_levels == -1) {
- if (acpi_has_method(NULL,
- "\\_SB.PCI0.LPCB.FJEX.SBL2"))
- use_alt_lcd_levels = 1;
- else
- use_alt_lcd_levels = 0;
- vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
- "%i\n", use_alt_lcd_levels);
- }
}
static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */
- fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */
+ fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */
+ fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */
return 1;
}
static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */
- fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */
+ fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */
+ fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */
return 1;
}
static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_HELP; /* "Support" */
- fujitsu->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */
- fujitsu->keycode4 = KEY_WWW; /* "Internet" */
+ fujitsu_bl->keycode1 = KEY_HELP; /* "Support" */
+ fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */
+ fujitsu_bl->keycode4 = KEY_WWW; /* "Internet" */
return 1;
}
@@ -693,7 +685,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = {
/* ACPI device for LCD brightness control */
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
{
int state = 0;
struct input_dev *input;
@@ -702,22 +694,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
if (!device)
return -EINVAL;
- fujitsu->acpi_handle = device->handle;
- sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+ fujitsu_bl->acpi_handle = device->handle;
+ sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
- device->driver_data = fujitsu;
+ device->driver_data = fujitsu_bl;
- fujitsu->input = input = input_allocate_device();
+ fujitsu_bl->input = input = input_allocate_device();
if (!input) {
error = -ENOMEM;
goto err_stop;
}
- snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+ snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
"%s/video/input0", acpi_device_hid(device));
input->name = acpi_device_name(device);
- input->phys = fujitsu->phys;
+ input->phys = fujitsu_bl->phys;
input->id.bustype = BUS_HOST;
input->id.product = 0x06;
input->dev.parent = &device->dev;
@@ -730,7 +722,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
if (error)
goto err_free_input_dev;
- error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+ error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
if (error) {
pr_err("Error reading power state\n");
goto err_unregister_input_dev;
@@ -740,7 +732,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
acpi_device_name(device), acpi_device_bid(device),
!device->power.state ? "on" : "off");
- fujitsu->dev = device;
+ fujitsu_bl->dev = device;
if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -750,6 +742,15 @@ static int acpi_fujitsu_add(struct acpi_device *device)
pr_err("_INI Method failed\n");
}
+ if (use_alt_lcd_levels == -1) {
+ if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+ use_alt_lcd_levels = 1;
+ else
+ use_alt_lcd_levels = 0;
+ vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+ use_alt_lcd_levels);
+ }
+
/* do config (detect defaults) */
use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
@@ -758,7 +759,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
use_alt_lcd_levels, disable_brightness_adjust);
if (get_max_brightness() <= 0)
- fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+ fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
get_lcd_level();
return 0;
@@ -772,38 +773,38 @@ err_stop:
return error;
}
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
{
- struct fujitsu_t *fujitsu = acpi_driver_data(device);
- struct input_dev *input = fujitsu->input;
+ struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+ struct input_dev *input = fujitsu_bl->input;
input_unregister_device(input);
- fujitsu->acpi_handle = NULL;
+ fujitsu_bl->acpi_handle = NULL;
return 0;
}
/* Brightness notify */
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
{
struct input_dev *input;
int keycode;
int oldb, newb;
- input = fujitsu->input;
+ input = fujitsu_bl->input;
switch (event) {
case ACPI_FUJITSU_NOTIFY_CODE1:
keycode = 0;
- oldb = fujitsu->brightness_level;
+ oldb = fujitsu_bl->brightness_level;
get_lcd_level();
- newb = fujitsu->brightness_level;
+ newb = fujitsu_bl->brightness_level;
vdbg_printk(FUJLAPTOP_DBG_TRACE,
"brightness button event [%i -> %i (%i)]\n",
- oldb, newb, fujitsu->brightness_changed);
+ oldb, newb, fujitsu_bl->brightness_changed);
if (oldb < newb) {
if (disable_brightness_adjust != 1) {
@@ -840,7 +841,7 @@ static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
/* ACPI device for hotkey handling */
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
{
int result = 0;
int state = 0;
@@ -851,42 +852,42 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
if (!device)
return -EINVAL;
- fujitsu_hotkey->acpi_handle = device->handle;
+ fujitsu_laptop->acpi_handle = device->handle;
sprintf(acpi_device_name(device), "%s",
- ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+ ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
- device->driver_data = fujitsu_hotkey;
+ device->driver_data = fujitsu_laptop;
/* kfifo */
- spin_lock_init(&fujitsu_hotkey->fifo_lock);
- error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+ spin_lock_init(&fujitsu_laptop->fifo_lock);
+ error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
GFP_KERNEL);
if (error) {
pr_err("kfifo_alloc failed\n");
goto err_stop;
}
- fujitsu_hotkey->input = input = input_allocate_device();
+ fujitsu_laptop->input = input = input_allocate_device();
if (!input) {
error = -ENOMEM;
goto err_free_fifo;
}
- snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+ snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
"%s/video/input0", acpi_device_hid(device));
input->name = acpi_device_name(device);
- input->phys = fujitsu_hotkey->phys;
+ input->phys = fujitsu_laptop->phys;
input->id.bustype = BUS_HOST;
input->id.product = 0x06;
input->dev.parent = &device->dev;
set_bit(EV_KEY, input->evbit);
- set_bit(fujitsu->keycode1, input->keybit);
- set_bit(fujitsu->keycode2, input->keybit);
- set_bit(fujitsu->keycode3, input->keybit);
- set_bit(fujitsu->keycode4, input->keybit);
- set_bit(fujitsu->keycode5, input->keybit);
+ set_bit(fujitsu_bl->keycode1, input->keybit);
+ set_bit(fujitsu_bl->keycode2, input->keybit);
+ set_bit(fujitsu_bl->keycode3, input->keybit);
+ set_bit(fujitsu_bl->keycode4, input->keybit);
+ set_bit(fujitsu_bl->keycode5, input->keybit);
set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
set_bit(KEY_UNKNOWN, input->keybit);
@@ -894,7 +895,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
if (error)
goto err_free_input_dev;
- error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+ error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
if (error) {
pr_err("Error reading power state\n");
goto err_unregister_input_dev;
@@ -904,7 +905,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
acpi_device_name(device), acpi_device_bid(device),
!device->power.state ? "on" : "off");
- fujitsu_hotkey->dev = device;
+ fujitsu_laptop->dev = device;
if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -920,27 +921,27 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
; /* No action, result is discarded */
vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
- fujitsu_hotkey->rfkill_supported =
- call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+ fujitsu_laptop->flags_supported =
+ call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
/* Make sure our bitmask of supported functions is cleared if the
RFKILL function block is not implemented, like on the S7020. */
- if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
- fujitsu_hotkey->rfkill_supported = 0;
+ if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+ fujitsu_laptop->flags_supported = 0;
- if (fujitsu_hotkey->rfkill_supported)
- fujitsu_hotkey->rfkill_state =
- call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+ if (fujitsu_laptop->flags_supported)
+ fujitsu_laptop->flags_state =
+ call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
/* Suspect this is a keymap of the application panel, print it */
pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
#if IS_ENABLED(CONFIG_LEDS_CLASS)
if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&logolamp_led);
if (result == 0) {
- fujitsu_hotkey->logolamp_registered = 1;
+ fujitsu_laptop->logolamp_registered = 1;
} else {
pr_err("Could not register LED handler for logo lamp, error %i\n",
result);
@@ -949,10 +950,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
(call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&kblamps_led);
if (result == 0) {
- fujitsu_hotkey->kblamps_registered = 1;
+ fujitsu_laptop->kblamps_registered = 1;
} else {
pr_err("Could not register LED handler for keyboard lamps, error %i\n",
result);
@@ -966,10 +967,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
* that an RF LED is present.
*/
if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&radio_led);
if (result == 0) {
- fujitsu_hotkey->radio_led_registered = 1;
+ fujitsu_laptop->radio_led_registered = 1;
} else {
pr_err("Could not register LED handler for radio LED, error %i\n",
result);
@@ -983,10 +984,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
*/
if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
(call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&eco_led);
if (result == 0) {
- fujitsu_hotkey->eco_led_registered = 1;
+ fujitsu_laptop->eco_led_registered = 1;
} else {
pr_err("Could not register LED handler for eco LED, error %i\n",
result);
@@ -1002,47 +1003,47 @@ err_unregister_input_dev:
err_free_input_dev:
input_free_device(input);
err_free_fifo:
- kfifo_free(&fujitsu_hotkey->fifo);
+ kfifo_free(&fujitsu_laptop->fifo);
err_stop:
return error;
}
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
{
- struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
- struct input_dev *input = fujitsu_hotkey->input;
+ struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+ struct input_dev *input = fujitsu_laptop->input;
#if IS_ENABLED(CONFIG_LEDS_CLASS)
- if (fujitsu_hotkey->logolamp_registered)
+ if (fujitsu_laptop->logolamp_registered)
led_classdev_unregister(&logolamp_led);
- if (fujitsu_hotkey->kblamps_registered)
+ if (fujitsu_laptop->kblamps_registered)
led_classdev_unregister(&kblamps_led);
- if (fujitsu_hotkey->radio_led_registered)
+ if (fujitsu_laptop->radio_led_registered)
led_classdev_unregister(&radio_led);
- if (fujitsu_hotkey->eco_led_registered)
+ if (fujitsu_laptop->eco_led_registered)
led_classdev_unregister(&eco_led);
#endif
input_unregister_device(input);
- kfifo_free(&fujitsu_hotkey->fifo);
+ kfifo_free(&fujitsu_laptop->fifo);
- fujitsu_hotkey->acpi_handle = NULL;
+ fujitsu_laptop->acpi_handle = NULL;
return 0;
}
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
{
- struct input_dev *input = fujitsu_hotkey->input;
+ struct input_dev *input = fujitsu_laptop->input;
int status;
- status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+ status = kfifo_in_locked(&fujitsu_laptop->fifo,
(unsigned char *)&keycode, sizeof(keycode),
- &fujitsu_hotkey->fifo_lock);
+ &fujitsu_laptop->fifo_lock);
if (status != sizeof(keycode)) {
vdbg_printk(FUJLAPTOP_DBG_WARN,
"Could not push keycode [0x%x]\n", keycode);
@@ -1054,16 +1055,16 @@ static void acpi_fujitsu_hotkey_press(int keycode)
"Push keycode into ringbuffer [%d]\n", keycode);
}
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
{
- struct input_dev *input = fujitsu_hotkey->input;
+ struct input_dev *input = fujitsu_laptop->input;
int keycode, status;
while (true) {
- status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+ status = kfifo_out_locked(&fujitsu_laptop->fifo,
(unsigned char *)&keycode,
sizeof(keycode),
- &fujitsu_hotkey->fifo_lock);
+ &fujitsu_laptop->fifo_lock);
if (status != sizeof(keycode))
return;
input_report_key(input, keycode, 0);
@@ -1073,14 +1074,14 @@ static void acpi_fujitsu_hotkey_release(void)
}
}
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
{
struct input_dev *input;
int keycode;
unsigned int irb = 1;
int i;
- input = fujitsu_hotkey->input;
+ input = fujitsu_laptop->input;
if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
keycode = KEY_UNKNOWN;
@@ -1093,9 +1094,9 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
return;
}
- if (fujitsu_hotkey->rfkill_supported)
- fujitsu_hotkey->rfkill_state =
- call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+ if (fujitsu_laptop->flags_supported)
+ fujitsu_laptop->flags_state =
+ call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
i = 0;
while ((irb =
@@ -1103,19 +1104,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
switch (irb & 0x4ff) {
case KEY1_CODE:
- keycode = fujitsu->keycode1;
+ keycode = fujitsu_bl->keycode1;
break;
case KEY2_CODE:
- keycode = fujitsu->keycode2;
+ keycode = fujitsu_bl->keycode2;
break;
case KEY3_CODE:
- keycode = fujitsu->keycode3;
+ keycode = fujitsu_bl->keycode3;
break;
case KEY4_CODE:
- keycode = fujitsu->keycode4;
+ keycode = fujitsu_bl->keycode4;
break;
case KEY5_CODE:
- keycode = fujitsu->keycode5;
+ keycode = fujitsu_bl->keycode5;
break;
case 0:
keycode = 0;
@@ -1128,17 +1129,17 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
}
if (keycode > 0)
- acpi_fujitsu_hotkey_press(keycode);
+ acpi_fujitsu_laptop_press(keycode);
else if (keycode == 0)
- acpi_fujitsu_hotkey_release();
+ acpi_fujitsu_laptop_release();
}
/* On some models (first seen on the Skylake-based Lifebook
* E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
- * handled in software; its state is queried using FUNC_RFKILL
+ * handled in software; its state is queried using FUNC_FLAGS
*/
- if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
- (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+ if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+ (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
keycode = KEY_TOUCHPAD_TOGGLE;
input_report_key(input, keycode, 1);
input_sync(input);
@@ -1150,83 +1151,81 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
/* Initialization */
-static const struct acpi_device_id fujitsu_device_ids[] = {
- {ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+ {ACPI_FUJITSU_BL_HID, 0},
{"", 0},
};
-static struct acpi_driver acpi_fujitsu_driver = {
- .name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+ .name = ACPI_FUJITSU_BL_DRIVER_NAME,
.class = ACPI_FUJITSU_CLASS,
- .ids = fujitsu_device_ids,
+ .ids = fujitsu_bl_device_ids,
.ops = {
- .add = acpi_fujitsu_add,
- .remove = acpi_fujitsu_remove,
- .notify = acpi_fujitsu_notify,
+ .add = acpi_fujitsu_bl_add,
+ .remove = acpi_fujitsu_bl_remove,
+ .notify = acpi_fujitsu_bl_notify,
},
};
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
- {ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+ {ACPI_FUJITSU_LAPTOP_HID, 0},
{"", 0},
};
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
- .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+ .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
.class = ACPI_FUJITSU_CLASS,
- .ids = fujitsu_hotkey_device_ids,
+ .ids = fujitsu_laptop_device_ids,
.ops = {
- .add = acpi_fujitsu_hotkey_add,
- .remove = acpi_fujitsu_hotkey_remove,
- .notify = acpi_fujitsu_hotkey_notify,
+ .add = acpi_fujitsu_laptop_add,
+ .remove = acpi_fujitsu_laptop_remove,
+ .notify = acpi_fujitsu_laptop_notify,
},
};
static const struct acpi_device_id fujitsu_ids[] __used = {
- {ACPI_FUJITSU_HID, 0},
- {ACPI_FUJITSU_HOTKEY_HID, 0},
+ {ACPI_FUJITSU_BL_HID, 0},
+ {ACPI_FUJITSU_LAPTOP_HID, 0},
{"", 0}
};
MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
static int __init fujitsu_init(void)
{
- int ret, result, max_brightness;
+ int ret, max_brightness;
if (acpi_disabled)
return -ENODEV;
- fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
- if (!fujitsu)
+ fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+ if (!fujitsu_bl)
return -ENOMEM;
- fujitsu->keycode1 = KEY_PROG1;
- fujitsu->keycode2 = KEY_PROG2;
- fujitsu->keycode3 = KEY_PROG3;
- fujitsu->keycode4 = KEY_PROG4;
- fujitsu->keycode5 = KEY_RFKILL;
+ fujitsu_bl->keycode1 = KEY_PROG1;
+ fujitsu_bl->keycode2 = KEY_PROG2;
+ fujitsu_bl->keycode3 = KEY_PROG3;
+ fujitsu_bl->keycode4 = KEY_PROG4;
+ fujitsu_bl->keycode5 = KEY_RFKILL;
dmi_check_system(fujitsu_dmi_table);
- result = acpi_bus_register_driver(&acpi_fujitsu_driver);
- if (result < 0) {
- ret = -ENODEV;
+ ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+ if (ret)
goto fail_acpi;
- }
/* Register platform stuff */
- fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
- if (!fujitsu->pf_device) {
+ fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+ if (!fujitsu_bl->pf_device) {
ret = -ENOMEM;
goto fail_platform_driver;
}
- ret = platform_device_add(fujitsu->pf_device);
+ ret = platform_device_add(fujitsu_bl->pf_device);
if (ret)
goto fail_platform_device1;
ret =
- sysfs_create_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
if (ret)
goto fail_platform_device2;
@@ -1236,90 +1235,88 @@ static int __init fujitsu_init(void)
struct backlight_properties props;
memset(&props, 0, sizeof(struct backlight_properties));
- max_brightness = fujitsu->max_brightness;
+ max_brightness = fujitsu_bl->max_brightness;
props.type = BACKLIGHT_PLATFORM;
props.max_brightness = max_brightness - 1;
- fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
- NULL, NULL,
- &fujitsubl_ops,
- &props);
- if (IS_ERR(fujitsu->bl_device)) {
- ret = PTR_ERR(fujitsu->bl_device);
- fujitsu->bl_device = NULL;
+ fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+ NULL, NULL,
+ &fujitsu_bl_ops,
+ &props);
+ if (IS_ERR(fujitsu_bl->bl_device)) {
+ ret = PTR_ERR(fujitsu_bl->bl_device);
+ fujitsu_bl->bl_device = NULL;
goto fail_sysfs_group;
}
- fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+ fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
}
- ret = platform_driver_register(&fujitsupf_driver);
+ ret = platform_driver_register(&fujitsu_pf_driver);
if (ret)
goto fail_backlight;
- /* Register hotkey driver */
+ /* Register laptop driver */
- fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
- if (!fujitsu_hotkey) {
+ fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+ if (!fujitsu_laptop) {
ret = -ENOMEM;
- goto fail_hotkey;
+ goto fail_laptop;
}
- result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
- if (result < 0) {
- ret = -ENODEV;
- goto fail_hotkey1;
- }
+ ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+ if (ret)
+ goto fail_laptop1;
/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
- fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+ fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
else
- fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+ fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
}
pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
return 0;
-fail_hotkey1:
- kfree(fujitsu_hotkey);
-fail_hotkey:
- platform_driver_unregister(&fujitsupf_driver);
+fail_laptop1:
+ kfree(fujitsu_laptop);
+fail_laptop:
+ platform_driver_unregister(&fujitsu_pf_driver);
fail_backlight:
- backlight_device_unregister(fujitsu->bl_device);
+ backlight_device_unregister(fujitsu_bl->bl_device);
fail_sysfs_group:
- sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
fail_platform_device2:
- platform_device_del(fujitsu->pf_device);
+ platform_device_del(fujitsu_bl->pf_device);
fail_platform_device1:
- platform_device_put(fujitsu->pf_device);
+ platform_device_put(fujitsu_bl->pf_device);
fail_platform_driver:
- acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
fail_acpi:
- kfree(fujitsu);
+ kfree(fujitsu_bl);
return ret;
}
static void __exit fujitsu_cleanup(void)
{
- acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
- kfree(fujitsu_hotkey);
+ kfree(fujitsu_laptop);
- platform_driver_unregister(&fujitsupf_driver);
+ platform_driver_unregister(&fujitsu_pf_driver);
- backlight_device_unregister(fujitsu->bl_device);
+ backlight_device_unregister(fujitsu_bl->bl_device);
- sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
- platform_device_unregister(fujitsu->pf_device);
+ platform_device_unregister(fujitsu_bl->pf_device);
- acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
- kfree(fujitsu);
+ kfree(fujitsu_bl);
pr_info("driver unloaded\n");
}
@@ -1341,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
MODULE_DESCRIPTION("Fujitsu laptop extras support");
MODULE_VERSION(FUJITSU_DRIVER_VERSION);
MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index 09b4df74291e..bb865695d7a6 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void)
kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
- if (IS_ERR(kvm_ptp_clock.ptp_clock))
- return PTR_ERR(kvm_ptp_clock.ptp_clock);
-
- return 0;
+ return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock);
}
module_init(ptp_kvm_init);
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 65f86bc24c07..1dc43fc5f65f 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
depends on OF && ARCH_QCOM
depends on REMOTEPROC
depends on QCOM_SMEM
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
select MFD_SYSCON
select QCOM_MDT_LOADER
select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
depends on OF && ARCH_QCOM
depends on QCOM_SMEM
depends on REMOTEPROC
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
select MFD_SYSCON
select QCOM_RPROC_COMMON
select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
config QCOM_WCNSS_PIL
tristate "Qualcomm WCNSS Peripheral Image Loader"
depends on OF && ARCH_QCOM
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
depends on QCOM_SMEM
depends on REMOTEPROC
select QCOM_MDT_LOADER
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 230043c1c90f..3c52867dfe28 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1241,16 +1241,15 @@ config SCSI_LPFC
tristate "Emulex LightPulse Fibre Channel Support"
depends on PCI && SCSI
depends on SCSI_FC_ATTRS
- depends on NVME_FC && NVME_TARGET_FC
select CRC_T10DIF
- help
+ ---help---
This lpfc driver supports the Emulex LightPulse
Family of Fibre Channel PCI host adapters.
config SCSI_LPFC_DEBUG_FS
bool "Emulex LightPulse Fibre Channel debugfs Support"
depends on SCSI_LPFC && DEBUG_FS
- help
+ ---help---
This makes debugging information from the lpfc driver
available via the debugfs filesystem.
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 2e5338dec621..7b0410e0f569 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -468,7 +468,7 @@ err_out:
return -1;
err_blink:
- return (status > 16) & 0xFF;
+ return (status >> 16) & 0xFF;
}
static inline u32 aac_get_vector(struct aac_dev *dev)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 524a0c755ed7..0d0be7754a65 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
/* fill_cmd can't fail here, no data buffer to map. */
(void) fill_cmd(c, reset_type, h, NULL, 0, 0,
scsi3addr, TYPE_MSG);
- rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+ rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
if (rc) {
dev_warn(&h->pdev->dev, "Failed to send reset command\n");
goto out;
@@ -3714,7 +3714,7 @@ exit_failed:
* # (integer code indicating one of several NOT READY states
* describing why a volume is to be kept offline)
*/
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
unsigned char scsi3addr[])
{
struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
DEFAULT_TIMEOUT);
if (rc) {
cmd_free(h, c);
- return 0;
+ return HPSA_VPD_LV_STATUS_UNSUPPORTED;
}
sense = c->err_info->SenseInfo;
if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
cmd_status = c->err_info->CommandStatus;
scsi_status = c->err_info->ScsiStatus;
cmd_free(h, c);
- /* Is the volume 'not ready'? */
- if (cmd_status != CMD_TARGET_STATUS ||
- scsi_status != SAM_STAT_CHECK_CONDITION ||
- sense_key != NOT_READY ||
- asc != ASC_LUN_NOT_READY) {
- return 0;
- }
/* Determine the reason for not ready state */
ldstat = hpsa_get_volume_status(h, scsi3addr);
/* Keep volume offline in certain cases: */
switch (ldstat) {
+ case HPSA_LV_FAILED:
case HPSA_LV_UNDERGOING_ERASE:
case HPSA_LV_NOT_AVAILABLE:
case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
default:
break;
}
- return 0;
+ return HPSA_LV_OK;
}
/*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
/* Do an inquiry to the device to see what it is. */
if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
- /* Inquiry failed (msg printed already) */
dev_err(&h->pdev->dev,
- "hpsa_update_device_info: inquiry failed\n");
- rc = -EIO;
+ "%s: inquiry failed, device will be skipped.\n",
+ __func__);
+ rc = HPSA_INQUIRY_FAILED;
goto bail_out;
}
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
if ((this_device->devtype == TYPE_DISK ||
this_device->devtype == TYPE_ZBC) &&
is_logical_dev_addr_mode(scsi3addr)) {
- int volume_offline;
+ unsigned char volume_offline;
hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
hpsa_get_ioaccel_status(h, scsi3addr, this_device);
volume_offline = hpsa_volume_offline(h, scsi3addr);
- if (volume_offline < 0 || volume_offline > 0xff)
- volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
- this_device->volume_offline = volume_offline & 0xff;
+ if (volume_offline == HPSA_LV_FAILED) {
+ rc = HPSA_LV_FAILED;
+ dev_err(&h->pdev->dev,
+ "%s: LV failed, device will be skipped.\n",
+ __func__);
+ goto bail_out;
+ }
} else {
this_device->raid_level = RAID_UNKNOWN;
this_device->offload_config = 0;
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
goto out;
}
if (rc) {
- dev_warn(&h->pdev->dev,
- "Inquiry failed, skipping device.\n");
+ h->drv_req_rescan = 1;
continue;
}
@@ -5558,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
spin_lock_irqsave(&h->scan_lock, flags);
h->scan_finished = 1;
- wake_up_all(&h->scan_wait_queue);
+ wake_up(&h->scan_wait_queue);
spin_unlock_irqrestore(&h->scan_lock, flags);
}
@@ -5576,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
if (unlikely(lockup_detected(h)))
return hpsa_scan_complete(h);
+ /*
+ * If a scan is already waiting to run, no need to add another
+ */
+ spin_lock_irqsave(&h->scan_lock, flags);
+ if (h->scan_waiting) {
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+
/* wait until any scan already in progress is finished. */
while (1) {
spin_lock_irqsave(&h->scan_lock, flags);
if (h->scan_finished)
break;
+ h->scan_waiting = 1;
spin_unlock_irqrestore(&h->scan_lock, flags);
wait_event(h->scan_wait_queue, h->scan_finished);
/* Note: We don't need to worry about a race between this
@@ -5590,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
*/
}
h->scan_finished = 0; /* mark scan as in progress */
+ h->scan_waiting = 0;
spin_unlock_irqrestore(&h->scan_lock, flags);
if (unlikely(lockup_detected(h)))
@@ -8792,6 +8802,7 @@ reinit_after_soft_reset:
init_waitqueue_head(&h->event_sync_wait_queue);
mutex_init(&h->reset_mutex);
h->scan_finished = 1; /* no scan currently in progress */
+ h->scan_waiting = 0;
pci_set_drvdata(pdev, h);
h->ndevices = 0;
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index bf6cdc106654..6f04f2ad4125 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -201,6 +201,7 @@ struct ctlr_info {
dma_addr_t errinfo_pool_dhandle;
unsigned long *cmd_pool_bits;
int scan_finished;
+ u8 scan_waiting : 1;
spinlock_t scan_lock;
wait_queue_head_t scan_wait_queue;
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index a584cdf07058..5961705eef76 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
#define CFGTBL_BusType_Fibre2G 0x00000200l
/* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED 0x02
#define HPSA_VPD_SUPPORTED_PAGES 0x00
#define HPSA_VPD_LV_DEVICE_ID 0x83
#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1
@@ -166,6 +167,7 @@
/* Logical volume states */
#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff
#define HPSA_LV_OK 0x0
+#define HPSA_LV_FAILED 0x01
#define HPSA_LV_NOT_AVAILABLE 0x0b
#define HPSA_LV_UNDERGOING_ERASE 0x0F
#define HPSA_LV_UNDERGOING_RPI 0x12
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 07c08ce68d70..894b1e3ebd56 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -561,8 +561,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
task->state = state;
- if (!list_empty(&task->running))
+ spin_lock_bh(&conn->taskqueuelock);
+ if (!list_empty(&task->running)) {
+ pr_debug_once("%s while task on list", __func__);
list_del_init(&task->running);
+ }
+ spin_unlock_bh(&conn->taskqueuelock);
if (conn->task == task)
conn->task = NULL;
@@ -784,7 +788,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
if (session->tt->xmit_task(task))
goto free_task;
} else {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->mgmtqueue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
@@ -1475,8 +1481,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
* this may be on the requeue list already if the xmit_task callout
* is handling the r2ts while we are adding new ones
*/
+ spin_lock_bh(&conn->taskqueuelock);
if (list_empty(&task->running))
list_add_tail(&task->running, &conn->requeue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1513,22 +1521,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
* only have one nop-out as a ping from us and targets should not
* overflow us with nop-ins
*/
+ spin_lock_bh(&conn->taskqueuelock);
check_mgmt:
while (!list_empty(&conn->mgmtqueue)) {
conn->task = list_entry(conn->mgmtqueue.next,
struct iscsi_task, running);
list_del_init(&conn->task->running);
+ spin_unlock_bh(&conn->taskqueuelock);
if (iscsi_prep_mgmt_task(conn, conn->task)) {
/* regular RX path uses back_lock */
spin_lock_bh(&conn->session->back_lock);
__iscsi_put_task(conn->task);
spin_unlock_bh(&conn->session->back_lock);
conn->task = NULL;
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_xmit_task(conn);
if (rc)
goto done;
+ spin_lock_bh(&conn->taskqueuelock);
}
/* process pending command queue */
@@ -1536,19 +1548,24 @@ check_mgmt:
conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
running);
list_del_init(&conn->task->running);
+ spin_unlock_bh(&conn->taskqueuelock);
if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
fail_scsi_task(conn->task, DID_IMM_RETRY);
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_prep_scsi_cmd_pdu(conn->task);
if (rc) {
if (rc == -ENOMEM || rc == -EACCES) {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&conn->task->running,
&conn->cmdqueue);
conn->task = NULL;
+ spin_unlock_bh(&conn->taskqueuelock);
goto done;
} else
fail_scsi_task(conn->task, DID_ABORT);
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_xmit_task(conn);
@@ -1559,6 +1576,7 @@ check_mgmt:
* we need to check the mgmt queue for nops that need to
* be sent to aviod starvation
*/
+ spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
@@ -1578,12 +1596,15 @@ check_mgmt:
conn->task = task;
list_del_init(&conn->task->running);
conn->task->state = ISCSI_TASK_RUNNING;
+ spin_unlock_bh(&conn->taskqueuelock);
rc = iscsi_xmit_task(conn);
if (rc)
goto done;
+ spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
+ spin_unlock_bh(&conn->taskqueuelock);
spin_unlock_bh(&conn->session->frwd_lock);
return -ENODATA;
@@ -1739,7 +1760,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
goto prepd_reject;
}
} else {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->cmdqueue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
@@ -2897,6 +2920,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
INIT_LIST_HEAD(&conn->mgmtqueue);
INIT_LIST_HEAD(&conn->cmdqueue);
INIT_LIST_HEAD(&conn->requeue);
+ spin_lock_init(&conn->taskqueuelock);
INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
/* allocate login_task used for the login/text sequences */
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 0bba2e30b4f0..257bbdd0f0b8 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -99,12 +99,13 @@ struct lpfc_sli2_slim;
#define FC_MAX_ADPTMSG 64
#define MAX_HBAEVT 32
+#define MAX_HBAS_NO_RESET 16
/* Number of MSI-X vectors the driver uses */
#define LPFC_MSIX_VECTORS 2
/* lpfc wait event data ready flag */
-#define LPFC_DATA_READY (1<<0)
+#define LPFC_DATA_READY 0 /* bit 0 */
/* queue dump line buffer size */
#define LPFC_LBUF_SZ 128
@@ -692,6 +693,7 @@ struct lpfc_hba {
* capability
*/
#define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT 0x100000
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
struct lpfc_dmabuf slim2p;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 5c783ef7f260..22819afbaef5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
lpfc_poll_show, lpfc_poll_store);
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
LPFC_ATTR(sli_mode, 0, 0, 3,
"SLI mode selector:"
" 0 - auto (SLI-3 if supported),"
@@ -3309,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
* lpfc_enable_fc4_type: Defines what FC4 types are supported.
* Supported Values: 1 - register just FCP
* 3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 3
+ * Supported values are [1,3]. Default value is 1
*/
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
"Define fc4 type to register with fabric.");
@@ -4451,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
phba->cfg_fcp_imax = (uint32_t)val;
- for (i = 0; i < phba->io_channel_irqs; i++)
+
+ for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, i);
return strlen(buf);
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 843dd73004da..54e6ac42fbcd 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
extern struct device_attribute *lpfc_hba_attrs[];
extern struct device_attribute *lpfc_vport_attrs[];
extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
extern struct scsi_host_template lpfc_template_nvme;
extern struct scsi_host_template lpfc_vport_template;
extern struct fc_function_template lpfc_transport_functions;
@@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_wcqe_complete *abts_cmpl);
extern int lpfc_enable_nvmet_cnt;
extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c22bb3f887e1..d3e9af983015 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
"FC4 x%08x, Data: x%08x x%08x\n",
ndlp, did, ndlp->nlp_fc4_type,
FC_TYPE_FCP, FC_TYPE_NVME);
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
}
- ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
lpfc_issue_els_prli(vport, ndlp, 0);
} else
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 9f4798e9d938..913eed822cb8 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
idiag.ptr_private = phba->sli4_hba.nvmels_cq;
goto pass_check;
}
- /* NVME LS complete queue */
- if (phba->sli4_hba.nvmels_cq &&
- phba->sli4_hba.nvmels_cq->queue_id == queid) {
- /* Sanity check */
- rc = lpfc_idiag_que_param_check(
- phba->sli4_hba.nvmels_cq, index, count);
- if (rc)
- goto error_out;
- idiag.ptr_private = phba->sli4_hba.nvmels_cq;
- goto pass_check;
- }
/* FCP complete queue */
if (phba->sli4_hba.fcp_cq) {
for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
idiag.ptr_private = phba->sli4_hba.nvmels_wq;
goto pass_check;
}
- /* NVME LS work queue */
- if (phba->sli4_hba.nvmels_wq &&
- phba->sli4_hba.nvmels_wq->queue_id == queid) {
- /* Sanity check */
- rc = lpfc_idiag_que_param_check(
- phba->sli4_hba.nvmels_wq, index, count);
- if (rc)
- goto error_out;
- idiag.ptr_private = phba->sli4_hba.nvmels_wq;
- goto pass_check;
- }
/* FCP work queue */
if (phba->sli4_hba.fcp_wq) {
for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2d26440e6f2f..d9c61d030034 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
static uint32_t
lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
- struct lpfc_hba *phba)
+ struct lpfc_vport *vport)
{
desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
- memcpy(desc->port_names.wwnn, phba->wwnn,
+ memcpy(desc->port_names.wwnn, &vport->fc_nodename,
sizeof(desc->port_names.wwnn));
- memcpy(desc->port_names.wwpn, phba->wwpn,
+ memcpy(desc->port_names.wwpn, &vport->fc_portname,
sizeof(desc->port_names.wwpn));
desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context,
len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
(len + pcmd), &rdp_context->link_stat);
len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
- (len + pcmd), phba);
+ (len + pcmd), vport);
len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
(len + pcmd), vport, ndlp);
len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
@@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
spin_lock_irq(shost->host_lock);
vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
spin_unlock_irq(shost->host_lock);
- if (vport->port_type == LPFC_PHYSICAL_PORT
- && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
- lpfc_issue_init_vfi(vport);
- else
+ if (mb->mbxStatus == MBX_NOT_FINISHED)
+ break;
+ if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+ !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ lpfc_issue_init_vfi(vport);
+ else
+ lpfc_initial_flogi(vport);
+ } else {
lpfc_initial_fdisc(vport);
+ }
break;
}
} else {
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 194a14d5f8a9..180b072beef6 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
ndlp->nlp_state, ndlp->nlp_rpi);
}
- if (!(vport->load_flag & FC_UNLOADING) &&
- !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
!(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
(ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
@@ -641,6 +640,8 @@ lpfc_work_done(struct lpfc_hba *phba)
lpfc_handle_rrq_active(phba);
if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
lpfc_sli4_fcp_xri_abort_event_proc(phba);
+ if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+ lpfc_sli4_nvme_xri_abort_event_proc(phba);
if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
lpfc_sli4_els_xri_abort_event_proc(phba);
if (phba->hba_flag & ASYNC_EVENT)
@@ -2173,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
uint32_t boot_flag, addr_mode;
uint16_t fcf_index, next_fcf_index;
struct lpfc_fcf_rec *fcf_rec = NULL;
- uint16_t vlan_id;
+ uint16_t vlan_id = LPFC_FCOE_NULL_VID;
bool select_new_fcf;
int rc;
@@ -4020,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
rdata = rport->dd_data;
/* break the link before dropping the ref */
ndlp->rport = NULL;
- if (rdata && rdata->pnode == ndlp)
- lpfc_nlp_put(ndlp);
- rdata->pnode = NULL;
+ if (rdata) {
+ if (rdata->pnode == ndlp)
+ lpfc_nlp_put(ndlp);
+ rdata->pnode = NULL;
+ }
/* drop reference for earlier registeration */
put_device(&rport->dev);
}
@@ -4344,9 +4347,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
{
INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
- init_timer(&ndlp->nlp_delayfunc);
- ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
- ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+ setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+ (unsigned long)ndlp);
ndlp->nlp_DID = did;
ndlp->vport = vport;
ndlp->phba = vport->phba;
@@ -4606,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
pring = qp->pring;
if (!pring)
continue;
- spin_lock_irq(&pring->ring_lock);
+ spin_lock(&pring->ring_lock);
__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
- spin_unlock_irq(&pring->ring_lock);
+ spin_unlock(&pring->ring_lock);
}
spin_unlock_irq(&phba->hbalock);
}
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index cfdb068a3bfc..15277705cb6b 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1001,7 +1001,7 @@ struct eq_delay_info {
uint32_t phase;
uint32_t delay_multi;
};
-#define LPFC_MAX_EQ_DELAY 8
+#define LPFC_MAX_EQ_DELAY_EQID_CNT 8
struct sgl_page_pairs {
uint32_t sgl_pg0_addr_lo;
@@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay {
union {
struct {
uint32_t num_eq;
- struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+ struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
} request;
struct {
uint32_t word0;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 0ee429d773f3..6cc561b04211 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3555,6 +3555,44 @@ out_free_mem:
return rc;
}
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+ uint64_t wwn;
+ int rc;
+ LPFC_MBOXQ_t *mboxq;
+ MAILBOX_t *mb;
+
+
+ mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+ GFP_KERNEL);
+ if (!mboxq)
+ return (uint64_t)-1;
+
+ /* First get WWN of HBA instance */
+ lpfc_read_nv(phba, mboxq);
+ rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+ if (rc != MBX_SUCCESS) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "6019 Mailbox failed , mbxCmd x%x "
+ "READ_NV, mbxStatus x%x\n",
+ bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+ bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+ mempool_free(mboxq, phba->mbox_mem_pool);
+ return (uint64_t) -1;
+ }
+ mb = &mboxq->u.mb;
+ memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+ /* wwn is WWPN of HBA instance */
+ mempool_free(mboxq, phba->mbox_mem_pool);
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ return be64_to_cpu(wwn);
+ else
+ return (((wwn & 0xffffffff00000000) >> 32) |
+ ((wwn & 0x00000000ffffffff) << 32));
+
+}
+
/**
* lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
* @phba: pointer to lpfc hba data structure.
@@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
struct lpfc_vport *vport;
struct Scsi_Host *shost = NULL;
int error = 0;
+ int i;
+ uint64_t wwn;
+ bool use_no_reset_hba = false;
+
+ wwn = lpfc_get_wwpn(phba);
+
+ for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+ if (wwn == lpfc_no_hba_reset[i]) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "6020 Setting use_no_reset port=%llx\n",
+ wwn);
+ use_no_reset_hba = true;
+ break;
+ }
+ }
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
if (dev != &phba->pcidev->dev) {
shost = scsi_host_alloc(&lpfc_vport_template,
sizeof(struct lpfc_vport));
} else {
- if (phba->sli_rev == LPFC_SLI_REV4)
+ if (!use_no_reset_hba)
shost = scsi_host_alloc(&lpfc_template,
sizeof(struct lpfc_vport));
else
- shost = scsi_host_alloc(&lpfc_template_s3,
+ shost = scsi_host_alloc(&lpfc_template_no_hr,
sizeof(struct lpfc_vport));
}
} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
@@ -3734,17 +3787,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
INIT_LIST_HEAD(&vport->rcv_buffer_list);
spin_lock_init(&vport->work_port_lock);
- init_timer(&vport->fc_disctmo);
- vport->fc_disctmo.function = lpfc_disc_timeout;
- vport->fc_disctmo.data = (unsigned long)vport;
+ setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+ (unsigned long)vport);
- init_timer(&vport->els_tmofunc);
- vport->els_tmofunc.function = lpfc_els_timeout;
- vport->els_tmofunc.data = (unsigned long)vport;
+ setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+ (unsigned long)vport);
- init_timer(&vport->delayed_disc_tmo);
- vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
- vport->delayed_disc_tmo.data = (unsigned long)vport;
+ setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+ (unsigned long)vport);
error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
if (error)
@@ -5406,21 +5456,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
INIT_LIST_HEAD(&phba->luns);
/* MBOX heartbeat timer */
- init_timer(&psli->mbox_tmo);
- psli->mbox_tmo.function = lpfc_mbox_timeout;
- psli->mbox_tmo.data = (unsigned long) phba;
+ setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
/* Fabric block timer */
- init_timer(&phba->fabric_block_timer);
- phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
- phba->fabric_block_timer.data = (unsigned long) phba;
+ setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+ (unsigned long)phba);
/* EA polling mode timer */
- init_timer(&phba->eratt_poll);
- phba->eratt_poll.function = lpfc_poll_eratt;
- phba->eratt_poll.data = (unsigned long) phba;
+ setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+ (unsigned long)phba);
/* Heartbeat timer */
- init_timer(&phba->hb_tmofunc);
- phba->hb_tmofunc.function = lpfc_hb_timeout;
- phba->hb_tmofunc.data = (unsigned long)phba;
+ setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
return 0;
}
@@ -5446,9 +5490,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
*/
/* FCP polling mode timer */
- init_timer(&phba->fcp_poll_timer);
- phba->fcp_poll_timer.function = lpfc_poll_timeout;
- phba->fcp_poll_timer.data = (unsigned long) phba;
+ setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+ (unsigned long)phba);
/* Host attention work mask setup */
phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5482,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
/* Initialize the host templates the configured values. */
lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
- lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
if (phba->cfg_enable_bg) {
@@ -5617,14 +5661,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
* Initialize timers used by driver
*/
- init_timer(&phba->rrq_tmr);
- phba->rrq_tmr.function = lpfc_rrq_timeout;
- phba->rrq_tmr.data = (unsigned long)phba;
+ setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
/* FCF rediscover timer */
- init_timer(&phba->fcf.redisc_wait);
- phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
- phba->fcf.redisc_wait.data = (unsigned long)phba;
+ setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+ (unsigned long)phba);
/*
* Control structure for handling external multi-buffer mailbox
@@ -5706,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
/* Initialize the host templates with the updated values. */
lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
@@ -5736,6 +5778,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
/* Initialize the Abort nvme buffer list used by driver */
spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+ /* Fast-path XRI aborted CQ Event work queue list */
+ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
}
/* This abort list used by worker thread */
@@ -5847,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
/* Check to see if it matches any module parameter */
for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
if (wwn == lpfc_enable_nvmet[i]) {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6017 NVME Target %016llx\n",
wwn);
phba->nvmet_support = 1; /* a match */
+#else
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "6021 Can't enable NVME Target."
+ " NVME_TARGET_FC infrastructure"
+ " is not in kernel\n");
+#endif
}
}
}
@@ -8712,12 +8763,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
}
}
- /*
- * Configure EQ delay multipier for interrupt coalescing using
- * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
- */
- for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+ for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, qidx);
+
return 0;
out_destroy:
@@ -8973,6 +9021,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
/* Pending ELS XRI abort events */
list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
&cqelist);
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ /* Pending NVME XRI abort events */
+ list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+ &cqelist);
+ }
/* Pending asynnc events */
list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
&cqelist);
@@ -10400,12 +10453,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
fc_remove_host(shost);
scsi_remove_host(shost);
- /* Perform ndlp cleanup on the physical port. The nvme and nvmet
- * localports are destroyed after to cleanup all transport memory.
- */
lpfc_cleanup(vport);
- lpfc_nvmet_destroy_targetport(phba);
- lpfc_nvme_destroy_localport(vport);
/*
* Bring down the SLI Layer. This step disable all interrupts,
@@ -12018,6 +12066,7 @@ static struct pci_driver lpfc_driver = {
.id_table = lpfc_id_table,
.probe = lpfc_pci_probe_one,
.remove = lpfc_pci_remove_one,
+ .shutdown = lpfc_pci_remove_one,
.suspend = lpfc_pci_suspend_one,
.resume = lpfc_pci_resume_one,
.err_handler = &lpfc_err_handler,
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index c61d8d692ede..5986c7957199 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
}
dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
- dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
if (!dma_buf->iocbq) {
kfree(dma_buf->context);
pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
@@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
"2621 Ran out of nvmet iocb/WQEs\n");
return NULL;
}
+ dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
nvmewqe = dma_buf->iocbq;
wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
/* Initialize WQE */
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 609a908ea9db..0024de1c6c1f 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
- bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+ bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
/* Word 6 */
@@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
* Embed the payload in the last half of the WQE
* WQE words 16-30 get the NVME CMD IU payload
*
- * WQE Word 16 is already setup with flags
- * WQE words 17-19 get payload Words 2-4
+ * WQE words 16-19 get payload Words 1-4
* WQE words 20-21 get payload Words 6-7
* WQE words 22-29 get payload Words 16-23
*/
- wptr = &wqe->words[17]; /* WQE ptr */
+ wptr = &wqe->words[16]; /* WQE ptr */
dptr = (uint32_t *)nCmd->cmdaddr; /* payload ptr */
- dptr += 2; /* Skip Words 0-1 in payload */
+ dptr++; /* Skip Word 0 in payload */
+ *wptr++ = *dptr++; /* Word 1 */
*wptr++ = *dptr++; /* Word 2 */
*wptr++ = *dptr++; /* Word 3 */
*wptr++ = *dptr++; /* Word 4 */
@@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
NVME_WRITE_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
phba->fc4NvmeOutputRequests++;
} else {
/* Word 7 */
@@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
NVME_READ_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_READ;
-
phba->fc4NvmeInputRequests++;
}
} else {
@@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
/* Word 11 */
bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
phba->fc4NvmeControlRequests++;
}
/*
@@ -1286,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
pnvme_fcreq->private = (void *)lpfc_ncmd;
lpfc_ncmd->nvmeCmd = pnvme_fcreq;
lpfc_ncmd->nrport = rport;
+ lpfc_ncmd->ndlp = ndlp;
lpfc_ncmd->start_time = jiffies;
lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
@@ -1319,7 +1311,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
"sid: x%x did: x%x oxid: x%x\n",
ret, vport->fc_myDID, ndlp->nlp_DID,
lpfc_ncmd->cur_iocbq.sli4_xritag);
- ret = -EINVAL;
+ ret = -EBUSY;
goto out_free_nvme_buf;
}
@@ -1821,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
pdma_phys_sgl1, cur_xritag);
if (status) {
/* failure, put on abort nvme list */
- lpfc_ncmd->exch_busy = 1;
+ lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
} else {
/* success, put on NVME buffer list */
- lpfc_ncmd->exch_busy = 0;
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
lpfc_ncmd->status = IOSTAT_SUCCESS;
num_posted++;
}
@@ -1854,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
struct lpfc_nvme_buf, list);
if (status) {
/* failure, put on abort nvme list */
- lpfc_ncmd->exch_busy = 1;
+ lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
} else {
/* success, put on NVME buffer list */
- lpfc_ncmd->exch_busy = 0;
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
lpfc_ncmd->status = IOSTAT_SUCCESS;
num_posted++;
}
@@ -2099,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
unsigned long iflag = 0;
lpfc_ncmd->nonsg_phys = 0;
- if (lpfc_ncmd->exch_busy) {
+ if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
iflag);
lpfc_ncmd->nvmeCmd = NULL;
@@ -2135,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
int
lpfc_nvme_create_localport(struct lpfc_vport *vport)
{
+ int ret = 0;
struct lpfc_hba *phba = vport->phba;
struct nvme_fc_port_info nfcp_info;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
- int len, ret = 0;
+ int len;
/* Initialize this localport instance. The vport wwn usage ensures
* that NPIV is accounted for.
@@ -2156,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
/* localport is allocated from the stack, but the registration
* call allocates heap memory as well as the private area.
*/
+#if (IS_ENABLED(CONFIG_NVME_FC))
ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
&vport->phba->pcidev->dev, &localport);
+#else
+ ret = -ENOMEM;
+#endif
if (!ret) {
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
"6005 Successfully registered local "
@@ -2173,10 +2170,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
lport->vport = vport;
INIT_LIST_HEAD(&lport->rport_list);
vport->nvmei_support = 1;
+ len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+ vport->phba->total_nvme_bufs += len;
}
- len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
- vport->phba->total_nvme_bufs += len;
return ret;
}
@@ -2193,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
void
lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
{
+#if (IS_ENABLED(CONFIG_NVME_FC))
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2208,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
"6011 Destroying NVME localport %p\n",
localport);
-
list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
/* The last node ref has to get released now before the rport
* private memory area is released by the transport.
@@ -2222,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
"6008 rport fail destroy %x\n", ret);
wait_for_completion_timeout(&rport->rport_unreg_done, 5);
}
+
/* lport's rport list is clear. Unregister
* lport and release resources.
*/
@@ -2245,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
"Failed, status x%x\n",
ret);
}
+#endif
}
void
@@ -2275,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
int
lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
{
+#if (IS_ENABLED(CONFIG_NVME_FC))
int ret = 0;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
@@ -2348,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
ret = nvme_fc_register_remoteport(localport, &rpinfo,
&remote_port);
if (!ret) {
@@ -2384,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
ndlp->nlp_type, ndlp->nlp_DID, ndlp);
}
return ret;
+#else
+ return 0;
+#endif
}
/* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
@@ -2401,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
void
lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
{
+#if (IS_ENABLED(CONFIG_NVME_FC))
int ret;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
@@ -2458,7 +2461,61 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
return;
input_err:
+#endif
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6168: State error: lport %p, rport%p FCID x%06x\n",
vport->localport, ndlp->rport, ndlp->nlp_DID);
}
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri)
+{
+ uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+ uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+ struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+ struct lpfc_nodelist *ndlp;
+ unsigned long iflag = 0;
+ int rrq_empty = 0;
+
+ if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+ return;
+ spin_lock_irqsave(&phba->hbalock, iflag);
+ spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+ list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+ &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+ list) {
+ if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+ list_del(&lpfc_ncmd->list);
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+ lpfc_ncmd->status = IOSTAT_SUCCESS;
+ spin_unlock(
+ &phba->sli4_hba.abts_nvme_buf_list_lock);
+
+ rrq_empty = list_empty(&phba->active_rrq_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ ndlp = lpfc_ncmd->ndlp;
+ if (ndlp) {
+ lpfc_set_rrq_active(
+ phba, ndlp,
+ lpfc_ncmd->cur_iocbq.sli4_lxritag,
+ rxid, 1);
+ lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+ }
+ lpfc_release_nvme_buf(phba, lpfc_ncmd);
+ if (rrq_empty)
+ lpfc_worker_wake_up(phba);
+ return;
+ }
+ }
+ spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index b2fae5e813f8..1347deb8dd6c 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -57,6 +57,7 @@ struct lpfc_nvme_buf {
struct list_head list;
struct nvmefc_fcp_req *nvmeCmd;
struct lpfc_nvme_rport *nrport;
+ struct lpfc_nodelist *ndlp;
uint32_t timeout;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index c421e1738ee9..7ca868f394da 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6102 Bad state IO x%x aborted\n",
ctxp->oxid);
+ rc = -ENXIO;
goto aerr;
}
@@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6152 FCP Drop IO x%x: Prep\n",
ctxp->oxid);
+ rc = -ENXIO;
goto aerr;
}
@@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
ctxp->wqeq->hba_wqidx = 0;
nvmewqeq->context2 = NULL;
nvmewqeq->context3 = NULL;
+ rc = -EBUSY;
aerr:
- return -ENXIO;
+ return rc;
}
static void
@@ -668,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
&phba->pcidev->dev,
&phba->targetport);
+#else
+ error = -ENOMEM;
+#endif
if (error) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
"6025 Cannot register NVME targetport "
@@ -731,9 +738,25 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
return 0;
}
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri)
+{
+ /* TODO: work in progress */
+}
+
void
lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_nvmet_tgtport *tgtp;
if (phba->nvmet_support == 0)
@@ -745,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
}
phba->targetport = NULL;
+#endif
}
/**
@@ -764,6 +788,7 @@ static void
lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct hbq_dmabuf *nvmebuf)
{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -844,6 +869,7 @@ dropit:
atomic_inc(&tgtp->xmt_ls_abort);
lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
}
/**
@@ -865,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
struct rqb_dmabuf *nvmebuf,
uint64_t isr_timestamp)
{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_nvmet_rcv_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
@@ -955,7 +982,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
atomic_inc(&tgtp->rcv_fcp_cmd_drop);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+ "6159 FCP Drop IO x%x: err x%x\n",
ctxp->oxid, rc);
dropit:
lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
@@ -970,6 +997,7 @@ dropit:
/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
}
+#endif
}
/**
@@ -1114,7 +1142,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
- bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+ bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
/* Word 6 */
@@ -1445,7 +1473,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
case NVMET_FCOP_RSP:
/* Words 0 - 2 */
- sgel = &rsp->sg[0];
physaddr = rsp->rspdma;
wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
@@ -1681,8 +1708,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
struct lpfc_nodelist *ndlp;
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
- "6067 %s: Entrypoint: sid %x xri %x\n", __func__,
- sid, xri);
+ "6067 Abort: sid %x xri x%x/x%x\n",
+ sid, xri, ctxp->wqeq->sli4_xritag);
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
@@ -1693,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
"6134 Drop ABTS - wrong NDLP state x%x.\n",
- ndlp->nlp_state);
+ (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
/* No failure to an ABTS request. */
return 0;
@@ -1791,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
"6160 Drop ABTS - wrong NDLP state x%x.\n",
- ndlp->nlp_state);
+ (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
/* No failure to an ABTS request. */
return 0;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 9d6384af9fce..54fd0c81ceaf 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5953,12 +5953,13 @@ struct scsi_host_template lpfc_template_nvme = {
.track_queue_depth = 0,
};
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
.module = THIS_MODULE,
.name = LPFC_DRIVER_NAME,
.proc_name = LPFC_DRIVER_NAME,
.info = lpfc_info,
.queuecommand = lpfc_queuecommand,
+ .eh_timed_out = fc_eh_timed_out,
.eh_abort_handler = lpfc_abort_handler,
.eh_device_reset_handler = lpfc_device_reset_handler,
.eh_target_reset_handler = lpfc_target_reset_handler,
@@ -6015,7 +6016,6 @@ struct scsi_host_template lpfc_vport_template = {
.eh_abort_handler = lpfc_abort_handler,
.eh_device_reset_handler = lpfc_device_reset_handler,
.eh_target_reset_handler = lpfc_target_reset_handler,
- .eh_bus_reset_handler = lpfc_bus_reset_handler,
.slave_alloc = lpfc_slave_alloc,
.slave_configure = lpfc_slave_configure,
.slave_destroy = lpfc_slave_destroy,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index e43e5e23c24b..1c9fa45df7eb 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,3 +1,4 @@
+
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
@@ -952,7 +953,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
start_sglq = sglq;
while (!found) {
if (!sglq)
- return NULL;
+ break;
if (ndlp && ndlp->active_rrqs_xri_bitmap &&
test_bit(sglq->sli4_lxritag,
ndlp->active_rrqs_xri_bitmap)) {
@@ -12213,6 +12214,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
}
/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+ struct lpfc_cq_event *cq_event;
+
+ /* First, declare the fcp xri abort event has been handled */
+ spin_lock_irq(&phba->hbalock);
+ phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+ spin_unlock_irq(&phba->hbalock);
+ /* Now, handle all the fcp xri abort events */
+ while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+ /* Get the first event from the head of the event queue */
+ spin_lock_irq(&phba->hbalock);
+ list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+ cq_event, struct lpfc_cq_event, list);
+ spin_unlock_irq(&phba->hbalock);
+ /* Notify aborted XRI for NVME work queue */
+ if (phba->nvmet_support) {
+ lpfc_sli4_nvmet_xri_aborted(phba,
+ &cq_event->cqe.wcqe_axri);
+ } else {
+ lpfc_sli4_nvme_xri_aborted(phba,
+ &cq_event->cqe.wcqe_axri);
+ }
+ /* Free the event processed back to the free pool */
+ lpfc_sli4_cq_event_release(phba, cq_event);
+ }
+}
+
+/**
* lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
* @phba: pointer to lpfc hba data structure.
*
@@ -12709,10 +12745,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
spin_unlock_irqrestore(&phba->hbalock, iflags);
workposted = true;
break;
+ case LPFC_NVME:
+ spin_lock_irqsave(&phba->hbalock, iflags);
+ list_add_tail(&cq_event->list,
+ &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+ /* Set the nvme xri abort event flag */
+ phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ workposted = true;
+ break;
default:
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0603 Invalid work queue CQE subtype (x%x)\n",
- cq->subtype);
+ "0603 Invalid CQ subtype %d: "
+ "%08x %08x %08x %08x\n",
+ cq->subtype, wcqe->word0, wcqe->parameter,
+ wcqe->word2, wcqe->word3);
+ lpfc_sli4_cq_event_release(phba, cq_event);
workposted = false;
break;
}
@@ -13827,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
* @startq: The starting FCP EQ to modify
*
* This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
*
* The @phba struct is used to send mailbox command to HBA. The @startq
* is used to get the starting FCP EQ to change.
@@ -13879,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
eq_delay->u.request.eq[cnt].phase = 0;
eq_delay->u.request.eq[cnt].delay_multi = dmult;
cnt++;
- if (cnt >= LPFC_MAX_EQ_DELAY)
+ if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
break;
}
eq_delay->u.request.num_eq = cnt;
@@ -15185,17 +15235,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
drq = drqp[idx];
cq = cqp[idx];
- if (hrq->entry_count != drq->entry_count) {
- status = -EINVAL;
- goto out;
- }
-
/* sanity check on queue memory */
if (!hrq || !drq || !cq) {
status = -ENODEV;
goto out;
}
+ if (hrq->entry_count != drq->entry_count) {
+ status = -EINVAL;
+ goto out;
+ }
+
if (idx == 0) {
bf_set(lpfc_mbx_rq_create_num_pages,
&rq_create->u.request,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 91153c9f6d18..710458cf11d6 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -642,6 +642,7 @@ struct lpfc_sli4_hba {
struct list_head sp_asynce_work_queue;
struct list_head sp_fcp_xri_aborted_work_queue;
struct list_head sp_els_xri_aborted_work_queue;
+ struct list_head sp_nvme_xri_aborted_work_queue;
struct list_head sp_unsol_work_queue;
struct lpfc_sli4_link link_state;
struct lpfc_sli4_lnk_info lnk_info;
@@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri);
void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
struct sli4_wcqe_xri_aborted *);
void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 86c6c9b26b82..d4e95e28f4e3 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.10"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index e7e5974e1a2c..2b209bbb4c91 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -35,8 +35,8 @@
/*
* MegaRAID SAS Driver meta data
*/
-#define MEGASAS_VERSION "07.701.16.00-rc1"
-#define MEGASAS_RELDATE "February 2, 2017"
+#define MEGASAS_VERSION "07.701.17.00-rc1"
+#define MEGASAS_RELDATE "March 2, 2017"
/*
* Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 7ac9a9ee9bd4..0016f12cc563 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1963,6 +1963,9 @@ scan_target:
if (!mr_device_priv_data)
return -ENOMEM;
sdev->hostdata = mr_device_priv_data;
+
+ atomic_set(&mr_device_priv_data->r1_ldio_hint,
+ instance->r1_ldio_hint_default);
return 0;
}
@@ -5034,10 +5037,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
&instance->irq_context[j]);
/* Retry irq register for IO_APIC*/
instance->msix_vectors = 0;
- if (is_probe)
+ if (is_probe) {
+ pci_free_irq_vectors(instance->pdev);
return megasas_setup_irqs_ioapic(instance);
- else
+ } else {
return -1;
+ }
}
}
return 0;
@@ -5277,9 +5282,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
MPI2_REPLY_POST_HOST_INDEX_OFFSET);
}
- i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
- if (i < 0)
- goto fail_setup_irqs;
+ if (!instance->msix_vectors) {
+ i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
+ if (i < 0)
+ goto fail_setup_irqs;
+ }
dev_info(&instance->pdev->dev,
"firmware supports msix\t: (%d)", fw_msix_count);
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 29650ba669da..f990ab4d45e1 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
cpu_sel = MR_RAID_CTX_CPUSEL_1;
if (is_stream_detected(rctx_g35) &&
- (raid->level == 5) &&
+ ((raid->level == 5) || (raid->level == 6)) &&
(raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
(cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
cpu_sel = MR_RAID_CTX_CPUSEL_0;
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
fp_possible = false;
atomic_dec(&instance->fw_outstanding);
} else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
- atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) {
+ (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
fp_possible = false;
atomic_dec(&instance->fw_outstanding);
if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 7fe7e6ed595b..8981806fb13f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
extern struct sas_function_template mpt3sas_transport_functions;
extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state);
/* trigger data externs */
void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 46e866c36c8a..919ba2bb15f1 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev);
+ r = scsi_internal_device_block(sdev, false);
if (r == -EINVAL)
sdev_printk(KERN_WARNING, sdev,
"device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
"performing a block followed by an unblock\n",
r, sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev);
+ r = scsi_internal_device_block(sdev, false);
if (r)
sdev_printk(KERN_WARNING, sdev, "retried device_block "
"failed with return(%d) for handle(0x%04x)\n",
@@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
struct MPT3SAS_DEVICE *sas_device_priv_data;
u32 response_code = 0;
unsigned long flags;
- unsigned int sector_sz;
mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
@@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
}
xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
- /* In case of bogus fw or device, we could end up having
- * unaligned partial completion. We can force alignment here,
- * then scsi-ml does not need to handle this misbehavior.
- */
- sector_sz = scmd->device->sector_size;
- if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
- xfer_cnt % sector_sz)) {
- sdev_printk(KERN_INFO, scmd->device,
- "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
- xfer_cnt, sector_sz);
- xfer_cnt = round_down(xfer_cnt, sector_sz);
- }
-
scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
log_info = le32_to_cpu(mpi_reply->IOCLogInfo);
diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
index 23bd70628a2f..7d173f48a81e 100644
--- a/drivers/scsi/qedf/qedf_dbg.h
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -81,14 +81,17 @@ struct qedf_dbg_ctx {
#define QEDF_INFO(pdev, level, fmt, ...) \
qedf_dbg_info(pdev, __func__, __LINE__, level, fmt, \
## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
u32 info, const char *fmt, ...);
/* GRC Dump related defines */
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index 868d423380d1..ed58b9104f58 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
case FIP_DT_MAC:
mp = (struct fip_mac_desc *)desc;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
- "fd_mac=%pM.\n", __func__, mp->fd_mac);
+ "fd_mac=%pM\n", mp->fd_mac);
ether_addr_copy(cvl_mac, mp->fd_mac);
break;
case FIP_DT_NAME:
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index ee0dcf9d3aba..46debe5034af 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
} else {
refcount = kref_read(&io_req->refcount);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
- "%d:0:%d:%d xid=0x%0x op=0x%02x "
+ "%d:0:%d:%lld xid=0x%0x op=0x%02x "
"lba=%02x%02x%02x%02x cdb_status=%d "
"fcp_resid=0x%x refcount=%d.\n",
qedf->lport->host->host_no, sc_cmd->device->id,
@@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
sc_cmd->result = result << 16;
refcount = kref_read(&io_req->refcount);
- QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+ QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
"sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
"allowed=%d retries=%d refcount=%d.\n",
qedf->lport->host->host_no, sc_cmd->device->id,
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index d9d7a86b5f8b..8e2a160490e6 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
}
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
- "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
- qedf->bdq_pbl_dma);
+ "BDQ PBL addr=0x%p dma=%pad\n",
+ qedf->bdq_pbl, &qedf->bdq_pbl_dma);
/*
* Populate BDQ PBL with physical and virtual address of individual
diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c
index 955936274241..59417199bf36 100644
--- a/drivers/scsi/qedi/qedi_debugfs.c
+++ b/drivers/scsi/qedi/qedi_debugfs.c
@@ -14,7 +14,7 @@
#include <linux/debugfs.h>
#include <linux/module.h>
-int do_not_recover;
+int qedi_do_not_recover;
static struct dentry *qedi_dbg_root;
void
@@ -74,22 +74,22 @@ qedi_dbg_exit(void)
static ssize_t
qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
{
- if (!do_not_recover)
- do_not_recover = 1;
+ if (!qedi_do_not_recover)
+ qedi_do_not_recover = 1;
QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
- do_not_recover);
+ qedi_do_not_recover);
return 0;
}
static ssize_t
qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
{
- if (do_not_recover)
- do_not_recover = 0;
+ if (qedi_do_not_recover)
+ qedi_do_not_recover = 0;
QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
- do_not_recover);
+ qedi_do_not_recover);
return 0;
}
@@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer,
if (*ppos)
return 0;
- cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+ cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
cnt = min_t(int, count, cnt - *ppos);
*ppos += cnt;
return cnt;
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index c9f0ef4e11b3..2bce3efc66a4 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work)
get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
qedi_conn->iscsi_conn_id);
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
- do_not_recover);
+ qedi_do_not_recover);
goto abort_ret;
}
diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h
index 8e488de88ece..63d793f46064 100644
--- a/drivers/scsi/qedi/qedi_gbl.h
+++ b/drivers/scsi/qedi/qedi_gbl.h
@@ -12,8 +12,14 @@
#include "qedi_iscsi.h"
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
extern uint qedi_io_tracing;
-extern int do_not_recover;
+
extern struct scsi_host_template qedi_host_template;
extern struct iscsi_transport qedi_iscsi_transport;
extern const struct qed_iscsi_ops *qedi_ops;
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index b9f79d36142d..4cc474364c50 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
return ERR_PTR(ret);
}
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
ret = -ENOMEM;
return ERR_PTR(ret);
}
@@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
struct qedi_endpoint *qedi_ep;
int ret = 0;
- if (do_not_recover)
+ if (qedi_do_not_recover)
return 1;
qedi_ep = ep->dd_data;
@@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
}
if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Do not recover cid=0x%x\n",
qedi_ep->iscsi_cid);
@@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
}
}
- if (do_not_recover)
+ if (qedi_do_not_recover)
goto ep_exit_recover;
switch (qedi_ep->state) {
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 5eda21d903e9..8e3d92807cb8 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
*/
qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
- qedi_setup_int(qedi);
+ rc = qedi_setup_int(qedi);
if (rc)
goto stop_iscsi_func;
diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig
index 67c0d5aa3212..de952935b5d2 100644
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -3,6 +3,7 @@ config SCSI_QLA_FC
depends on PCI && SCSI
depends on SCSI_FC_ATTRS
select FW_LOADER
+ select BTREE
---help---
This qla2xxx driver supports all QLogic Fibre Channel
PCI and PCIe host adapters.
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f610103994af..435ff7fd6384 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
"Timer for the VP[%d] has stopped\n", vha->vp_idx);
}
- BUG_ON(atomic_read(&vha->vref_count));
-
qla2x00_free_fcports(vha);
mutex_lock(&ha->vport_lock);
@@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
vha->gnl.ldma);
- if (vha->qpair->vp_idx == vha->vp_idx) {
+ if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) {
if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
ql_log(ql_log_warn, vha, 0x7087,
"Queue Pair delete failed.\n");
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 21d9fb7fc887..51b4179469d1 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id,
"%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size);
ql_dbg(level, vha, id,
"----- -----------------------------------------------\n");
- for (cnt = 0; cnt < size; cnt++, buf++) {
- if (cnt % 16 == 0)
- ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
- printk(" %02x", *buf);
- if (cnt % 16 == 15)
- printk("\n");
+ for (cnt = 0; cnt < size; cnt += 16) {
+ ql_dbg(level, vha, id, "%04x: ", cnt);
+ print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+ buf + cnt, min(16U, size - cnt), false);
}
- if (cnt % 16 != 0)
- printk("\n");
}
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index e1fc4e66966a..c6bffe929fe7 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...);
#define ql_dbg_tgt 0x00004000 /* Target mode */
#define ql_dbg_tgt_mgt 0x00002000 /* Target mode management */
#define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */
+#define ql_dbg_tgt_dif 0x00000800 /* Target mode dif */
extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
uint32_t, void **);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 625d438e3cce..ae119018dfaa 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -25,6 +25,7 @@
#include <linux/firmware.h>
#include <linux/aer.h>
#include <linux/mutex.h>
+#include <linux/btree.h>
#include <scsi/scsi.h>
#include <scsi/scsi_host.h>
@@ -395,11 +396,15 @@ struct srb_iocb {
struct completion comp;
} abt;
struct ct_arg ctarg;
+#define MAX_IOCB_MB_REG 28
+#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t))
struct {
- __le16 in_mb[28]; /* fr fw */
- __le16 out_mb[28]; /* to fw */
+ __le16 in_mb[MAX_IOCB_MB_REG]; /* from FW */
+ __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */
void *out, *in;
dma_addr_t out_dma, in_dma;
+ struct completion comp;
+ int rc;
} mbx;
struct {
struct imm_ntfy_from_isp *ntfy;
@@ -437,7 +442,7 @@ typedef struct srb {
uint32_t handle;
uint16_t flags;
uint16_t type;
- char *name;
+ const char *name;
int iocbs;
struct qla_qpair *qpair;
u32 gen1; /* scratch */
@@ -2300,6 +2305,8 @@ typedef struct fc_port {
struct ct_sns_desc ct_desc;
enum discovery_state disc_state;
enum login_state fw_login_state;
+ unsigned long plogi_nack_done_deadline;
+
u32 login_gen, last_login_gen;
u32 rscn_gen, last_rscn_gen;
u32 chip_reset;
@@ -3106,6 +3113,16 @@ struct qla_chip_state_84xx {
uint32_t gold_fw_version;
};
+struct qla_dif_statistics {
+ uint64_t dif_input_bytes;
+ uint64_t dif_output_bytes;
+ uint64_t dif_input_requests;
+ uint64_t dif_output_requests;
+ uint32_t dif_guard_err;
+ uint32_t dif_ref_tag_err;
+ uint32_t dif_app_tag_err;
+};
+
struct qla_statistics {
uint32_t total_isp_aborts;
uint64_t input_bytes;
@@ -3118,6 +3135,8 @@ struct qla_statistics {
uint32_t stat_max_pend_cmds;
uint32_t stat_max_qfull_cmds_alloc;
uint32_t stat_max_qfull_cmds_dropped;
+
+ struct qla_dif_statistics qla_dif_stats;
};
struct bidi_statistics {
@@ -3125,6 +3144,16 @@ struct bidi_statistics {
unsigned long long transfer_bytes;
};
+struct qla_tc_param {
+ struct scsi_qla_host *vha;
+ uint32_t blk_sz;
+ uint32_t bufflen;
+ struct scatterlist *sg;
+ struct scatterlist *prot_sg;
+ struct crc_context *ctx;
+ uint8_t *ctx_dsd_alloced;
+};
+
/* Multi queue support */
#define MBC_INITIALIZE_MULTIQ 0x1f
#define QLA_QUE_PAGE 0X1000
@@ -3272,6 +3301,8 @@ struct qlt_hw_data {
uint8_t tgt_node_name[WWN_SIZE];
struct dentry *dfs_tgt_sess;
+ struct dentry *dfs_tgt_port_database;
+
struct list_head q_full_list;
uint32_t num_pend_cmds;
uint32_t num_qfull_cmds_alloc;
@@ -3281,6 +3312,7 @@ struct qlt_hw_data {
spinlock_t sess_lock;
int rspq_vector_cpuid;
spinlock_t atio_lock ____cacheline_aligned;
+ struct btree_head32 host_map;
};
#define MAX_QFULL_CMDS_ALLOC 8192
@@ -3290,6 +3322,10 @@ struct qlt_hw_data {
#define LEAK_EXCHG_THRESH_HOLD_PERCENT 75 /* 75 percent */
+#define QLA_EARLY_LINKUP(_ha) \
+ ((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \
+ _ha->flags.fw_started && !_ha->flags.fw_init_done)
+
/*
* Qlogic host adapter specific data structure.
*/
@@ -3339,7 +3375,11 @@ struct qla_hw_data {
uint32_t fawwpn_enabled:1;
uint32_t exlogins_enabled:1;
uint32_t exchoffld_enabled:1;
- /* 35 bits */
+
+ uint32_t lip_ae:1;
+ uint32_t n2n_ae:1;
+ uint32_t fw_started:1;
+ uint32_t fw_init_done:1;
} flags;
/* This spinlock is used to protect "io transactions", you must
@@ -3432,7 +3472,6 @@ struct qla_hw_data {
#define P2P_LOOP 3
uint8_t interrupts_on;
uint32_t isp_abort_cnt;
-
#define PCI_DEVICE_ID_QLOGIC_ISP2532 0x2532
#define PCI_DEVICE_ID_QLOGIC_ISP8432 0x8432
#define PCI_DEVICE_ID_QLOGIC_ISP8001 0x8001
@@ -3913,6 +3952,7 @@ typedef struct scsi_qla_host {
struct list_head vp_fcports; /* list of fcports */
struct list_head work_list;
spinlock_t work_lock;
+ struct work_struct iocb_work;
/* Commonly used flags and state information. */
struct Scsi_Host *host;
@@ -4076,6 +4116,7 @@ typedef struct scsi_qla_host {
/* Count of active session/fcport */
int fcport_count;
wait_queue_head_t fcport_waitQ;
+ wait_queue_head_t vref_waitq;
} scsi_qla_host_t;
struct qla27xx_image_status {
@@ -4131,14 +4172,17 @@ struct qla2_sgx {
mb(); \
if (__vha->flags.delete_progress) { \
atomic_dec(&__vha->vref_count); \
+ wake_up(&__vha->vref_waitq); \
__bail = 1; \
} else { \
__bail = 0; \
} \
} while (0)
-#define QLA_VHA_MARK_NOT_BUSY(__vha) \
+#define QLA_VHA_MARK_NOT_BUSY(__vha) do { \
atomic_dec(&__vha->vref_count); \
+ wake_up(&__vha->vref_waitq); \
+} while (0) \
#define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do { \
atomic_inc(&__qpair->ref_count); \
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index b48cce696bac..989e17b0758c 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
struct qla_hw_data *ha = vha->hw;
unsigned long flags;
struct fc_port *sess = NULL;
- struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+ struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
- seq_printf(s, "%s\n",vha->host_str);
+ seq_printf(s, "%s\n", vha->host_str);
if (tgt) {
- seq_printf(s, "Port ID Port Name Handle\n");
+ seq_puts(s, "Port ID Port Name Handle\n");
spin_lock_irqsave(&ha->tgt.sess_lock, flags);
list_for_each_entry(sess, &vha->vp_fcports, list)
@@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
}
-
static const struct file_operations dfs_tgt_sess_ops = {
.open = qla2x00_dfs_tgt_sess_open,
.read = seq_read,
@@ -53,6 +52,78 @@ static const struct file_operations dfs_tgt_sess_ops = {
};
static int
+qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
+{
+ scsi_qla_host_t *vha = s->private;
+ struct qla_hw_data *ha = vha->hw;
+ struct gid_list_info *gid_list;
+ dma_addr_t gid_list_dma;
+ fc_port_t fc_port;
+ char *id_iter;
+ int rc, i;
+ uint16_t entries, loop_id;
+ struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+
+ seq_printf(s, "%s\n", vha->host_str);
+ if (tgt) {
+ gid_list = dma_alloc_coherent(&ha->pdev->dev,
+ qla2x00_gid_list_size(ha),
+ &gid_list_dma, GFP_KERNEL);
+ if (!gid_list) {
+ ql_dbg(ql_dbg_user, vha, 0x705c,
+ "DMA allocation failed for %u\n",
+ qla2x00_gid_list_size(ha));
+ return 0;
+ }
+
+ rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+ &entries);
+ if (rc != QLA_SUCCESS)
+ goto out_free_id_list;
+
+ id_iter = (char *)gid_list;
+
+ seq_puts(s, "Port Name Port ID Loop ID\n");
+
+ for (i = 0; i < entries; i++) {
+ struct gid_list_info *gid =
+ (struct gid_list_info *)id_iter;
+ loop_id = le16_to_cpu(gid->loop_id);
+ memset(&fc_port, 0, sizeof(fc_port_t));
+
+ fc_port.loop_id = loop_id;
+
+ rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+ seq_printf(s, "%8phC %02x%02x%02x %d\n",
+ fc_port.port_name, fc_port.d_id.b.domain,
+ fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+ fc_port.loop_id);
+ id_iter += ha->gid_list_info_size;
+ }
+out_free_id_list:
+ dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+ gid_list, gid_list_dma);
+ }
+
+ return 0;
+}
+
+static int
+qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
+{
+ scsi_qla_host_t *vha = inode->i_private;
+
+ return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
+}
+
+static const struct file_operations dfs_tgt_port_database_ops = {
+ .open = qla2x00_dfs_tgt_port_database_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int
qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
{
struct scsi_qla_host *vha = s->private;
@@ -114,6 +185,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
seq_printf(s, "num Q full sent = %lld\n",
vha->tgt_counters.num_q_full_sent);
+ /* DIF stats */
+ seq_printf(s, "DIF Inp Bytes = %lld\n",
+ vha->qla_stats.qla_dif_stats.dif_input_bytes);
+ seq_printf(s, "DIF Outp Bytes = %lld\n",
+ vha->qla_stats.qla_dif_stats.dif_output_bytes);
+ seq_printf(s, "DIF Inp Req = %lld\n",
+ vha->qla_stats.qla_dif_stats.dif_input_requests);
+ seq_printf(s, "DIF Outp Req = %lld\n",
+ vha->qla_stats.qla_dif_stats.dif_output_requests);
+ seq_printf(s, "DIF Guard err = %d\n",
+ vha->qla_stats.qla_dif_stats.dif_guard_err);
+ seq_printf(s, "DIF Ref tag err = %d\n",
+ vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
+ seq_printf(s, "DIF App tag err = %d\n",
+ vha->qla_stats.qla_dif_stats.dif_app_tag_err);
return 0;
}
@@ -281,6 +367,14 @@ create_nodes:
goto out;
}
+ ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
+ S_IRUSR, ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+ if (!ha->tgt.dfs_tgt_port_database) {
+ ql_log(ql_log_warn, vha, 0xffff,
+ "Unable to create debugFS tgt_port_database node.\n");
+ goto out;
+ }
+
ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
&dfs_fce_ops);
if (!ha->dfs_fce) {
@@ -311,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
ha->tgt.dfs_tgt_sess = NULL;
}
+ if (ha->tgt.dfs_tgt_port_database) {
+ debugfs_remove(ha->tgt.dfs_tgt_port_database);
+ ha->tgt.dfs_tgt_port_database = NULL;
+ }
+
if (ha->dfs_fw_resource_cnt) {
debugfs_remove(ha->dfs_fw_resource_cnt);
ha->dfs_fw_resource_cnt = NULL;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index b3d6441d1d90..5b2451745e9f 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
uint16_t *);
int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_abort_cmd(srb_t *);
/*
* Global Functions in qla_mid.c source file.
@@ -256,11 +257,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
- uint32_t *, uint16_t, struct qla_tgt_cmd *);
+ uint32_t *, uint16_t, struct qla_tc_param *);
extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
- uint32_t *, uint16_t, struct qla_tgt_cmd *);
+ uint32_t *, uint16_t, struct qla_tc_param *);
extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
- uint32_t *, uint16_t, struct qla_tgt_cmd *);
+ uint32_t *, uint16_t, struct qla_tc_param *);
extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
@@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
extern int
qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
- dma_addr_t, uint);
+ dma_addr_t, uint16_t);
extern int qla24xx_abort_command(srb_t *);
extern int qla24xx_async_abort_command(srb_t *);
@@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
extern int
qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
+int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
+int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
+ uint16_t *);
+int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *,
+ struct port_database_24xx *);
+
/*
* Global Function Prototypes in qla_isr.c source file.
*/
@@ -846,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
void qla24xx_delete_sess_fn(struct work_struct *);
void qlt_unknown_atio_work_fn(struct work_struct *);
+void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
+void qlt_remove_target_resources(struct qla_hw_data *);
#endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 32fb9007f137..f9d2fe7b1ade 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
struct srb *sp = s;
struct scsi_qla_host *vha = sp->vha;
struct qla_hw_data *ha = vha->hw;
- uint64_t zero = 0;
struct port_database_24xx *pd;
fc_port_t *fcport = sp->fcport;
u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
@@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
- /* Check for logged in state. */
- if (pd->current_login_state != PDS_PRLI_COMPLETE &&
- pd->last_login_state != PDS_PRLI_COMPLETE) {
- ql_dbg(ql_dbg_mbx, vha, 0xffff,
- "Unable to verify login-state (%x/%x) for "
- "loop_id %x.\n", pd->current_login_state,
- pd->last_login_state, fcport->loop_id);
- rval = QLA_FUNCTION_FAILED;
- goto gpd_error_out;
- }
-
- if (fcport->loop_id == FC_NO_LOOP_ID ||
- (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
- memcmp(fcport->port_name, pd->port_name, 8))) {
- /* We lost the device mid way. */
- rval = QLA_NOT_LOGGED_IN;
- goto gpd_error_out;
- }
-
- /* Names are little-endian. */
- memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
-
- /* Get port_id of device. */
- fcport->d_id.b.domain = pd->port_id[0];
- fcport->d_id.b.area = pd->port_id[1];
- fcport->d_id.b.al_pa = pd->port_id[2];
- fcport->d_id.b.rsvd_1 = 0;
-
- /* If not target must be initiator or unknown type. */
- if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
- fcport->port_type = FCT_INITIATOR;
- else
- fcport->port_type = FCT_TARGET;
-
- /* Passback COS information. */
- fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
- FC_COS_CLASS2 : FC_COS_CLASS3;
-
- if (pd->prli_svc_param_word_3[0] & BIT_7) {
- fcport->flags |= FCF_CONF_COMP_SUPPORTED;
- fcport->conf_compl_supported = 1;
- }
+ rval = __qla24xx_parse_gpdb(vha, fcport, pd);
gpd_error_out:
memset(&ea, 0, sizeof(ea));
@@ -876,10 +834,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
fcport->login_retry--;
if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
- (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
(fcport->fw_login_state == DSC_LS_PRLI_PEND))
return 0;
+ if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+ if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+ return 0;
+ }
+
/* for pure Target Mode. Login will not be initiated */
if (vha->host->active_mode == MODE_TARGET)
return 0;
@@ -1041,10 +1003,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
fcport->flags);
if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
- (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
(fcport->fw_login_state == DSC_LS_PRLI_PEND))
return;
+ if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+ if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+ return;
+ }
+
if (fcport->flags & FCF_ASYNC_SENT) {
fcport->login_retry++;
set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
@@ -1258,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
complete(&abt->u.abt.comp);
}
-static int
+int
qla24xx_async_abort_cmd(srb_t *cmd_sp)
{
scsi_qla_host_t *vha = cmd_sp->vha;
@@ -3212,6 +3178,7 @@ next_check:
} else {
ql_dbg(ql_dbg_init, vha, 0x00d3,
"Init Firmware -- success.\n");
+ ha->flags.fw_started = 1;
}
return (rval);
@@ -3374,8 +3341,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
uint8_t domain;
char connect_type[22];
struct qla_hw_data *ha = vha->hw;
- unsigned long flags;
scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+ port_id_t id;
/* Get host addresses. */
rval = qla2x00_get_adapter_id(vha,
@@ -3453,13 +3420,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
/* Save Host port and loop ID. */
/* byte order - Big Endian */
- vha->d_id.b.domain = domain;
- vha->d_id.b.area = area;
- vha->d_id.b.al_pa = al_pa;
-
- spin_lock_irqsave(&ha->vport_slock, flags);
- qlt_update_vp_map(vha, SET_AL_PA);
- spin_unlock_irqrestore(&ha->vport_slock, flags);
+ id.b.domain = domain;
+ id.b.area = area;
+ id.b.al_pa = al_pa;
+ id.b.rsvd_1 = 0;
+ qlt_update_host_map(vha, id);
if (!vha->flags.init_done)
ql_log(ql_log_info, vha, 0x2010,
@@ -4036,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
atomic_set(&vha->loop_state, LOOP_READY);
ql_dbg(ql_dbg_disc, vha, 0x2069,
"LOOP READY.\n");
+ ha->flags.fw_init_done = 1;
/*
* Process any ATIO queue entries that came in
@@ -5148,6 +5114,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
}
}
atomic_dec(&vha->vref_count);
+ wake_up(&vha->vref_waitq);
}
spin_unlock_irqrestore(&ha->vport_slock, flags);
}
@@ -5526,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
if (!(IS_P3P_TYPE(ha)))
ha->isp_ops->reset_chip(vha);
+ ha->flags.n2n_ae = 0;
+ ha->flags.lip_ae = 0;
+ ha->current_topology = 0;
+ ha->flags.fw_started = 0;
+ ha->flags.fw_init_done = 0;
ha->chip_reset++;
atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
@@ -6802,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
return;
if (!ha->fw_major_version)
return;
+ if (!ha->flags.fw_started)
+ return;
ret = qla2x00_stop_firmware(vha);
for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
@@ -6815,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
"Attempting retry of stop-firmware command.\n");
ret = qla2x00_stop_firmware(vha);
}
+
+ ha->flags.fw_started = 0;
+ ha->flags.fw_init_done = 0;
}
int
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 535079280288..ea027f6a7fd4 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
int
qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
- uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+ uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
{
void *next_dsd;
uint8_t avail_dsds = 0;
@@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
struct scatterlist *sg_prot;
uint32_t *cur_dsd = dsd;
uint16_t used_dsds = tot_dsds;
-
uint32_t prot_int; /* protection interval */
uint32_t partial;
struct qla2_sgx sgx;
@@ -966,7 +965,7 @@ alloc_and_fill:
} else {
list_add_tail(&dsd_ptr->list,
&(tc->ctx->dsd_list));
- tc->ctx_dsd_alloced = 1;
+ *tc->ctx_dsd_alloced = 1;
}
@@ -1005,7 +1004,7 @@ alloc_and_fill:
int
qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
- uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+ uint16_t tot_dsds, struct qla_tc_param *tc)
{
void *next_dsd;
uint8_t avail_dsds = 0;
@@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
} else {
list_add_tail(&dsd_ptr->list,
&(tc->ctx->dsd_list));
- tc->ctx_dsd_alloced = 1;
+ *tc->ctx_dsd_alloced = 1;
}
/* add new list to cmd iocb or last list */
@@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
int
qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
- uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+ uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
{
void *next_dsd;
uint8_t avail_dsds = 0;
@@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
} else {
list_add_tail(&dsd_ptr->list,
&(tc->ctx->dsd_list));
- tc->ctx_dsd_alloced = 1;
+ *tc->ctx_dsd_alloced = 1;
}
/* add new list to cmd iocb or last list */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 3c66ea29de27..3203367a4f42 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -708,6 +708,8 @@ skip_rio:
"mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx);
ha->isp_ops->fw_dump(vha, 1);
+ ha->flags.fw_init_done = 0;
+ ha->flags.fw_started = 0;
if (IS_FWI2_CAPABLE(ha)) {
if (mb[1] == 0 && mb[2] == 0) {
@@ -761,6 +763,9 @@ skip_rio:
break;
case MBA_LIP_OCCURRED: /* Loop Initialization Procedure */
+ ha->flags.lip_ae = 1;
+ ha->flags.n2n_ae = 0;
+
ql_dbg(ql_dbg_async, vha, 0x5009,
"LIP occurred (%x).\n", mb[1]);
@@ -797,6 +802,10 @@ skip_rio:
break;
case MBA_LOOP_DOWN: /* Loop Down Event */
+ ha->flags.n2n_ae = 0;
+ ha->flags.lip_ae = 0;
+ ha->current_topology = 0;
+
mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
? RD_REG_WORD(&reg24->mailbox4) : 0;
mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(&reg82->mailbox_out[4])
@@ -866,6 +875,9 @@ skip_rio:
/* case MBA_DCBX_COMPLETE: */
case MBA_POINT_TO_POINT: /* Point-to-Point */
+ ha->flags.lip_ae = 0;
+ ha->flags.n2n_ae = 1;
+
if (IS_QLA2100(ha))
break;
@@ -1620,9 +1632,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
QLA_LOGIO_LOGIN_RETRIED : 0;
if (logio->entry_status) {
ql_log(ql_log_warn, fcport->vha, 0x5034,
- "Async-%s error entry - hdl=%x"
+ "Async-%s error entry - %8phC hdl=%x"
"portid=%02x%02x%02x entry-status=%x.\n",
- type, sp->handle, fcport->d_id.b.domain,
+ type, fcport->port_name, sp->handle, fcport->d_id.b.domain,
fcport->d_id.b.area, fcport->d_id.b.al_pa,
logio->entry_status);
ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d,
@@ -1633,8 +1645,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) {
ql_dbg(ql_dbg_async, fcport->vha, 0x5036,
- "Async-%s complete - hdl=%x portid=%02x%02x%02x "
- "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+ "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x "
+ "iop0=%x.\n", type, fcport->port_name, sp->handle,
+ fcport->d_id.b.domain,
fcport->d_id.b.area, fcport->d_id.b.al_pa,
le32_to_cpu(logio->io_parameter[0]));
@@ -1674,6 +1687,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
case LSC_SCODE_NPORT_USED:
data[0] = MBS_LOOP_ID_USED;
break;
+ case LSC_SCODE_CMD_FAILED:
+ if (iop[1] == 0x0606) {
+ /*
+ * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI,
+ * Target side acked.
+ */
+ data[0] = MBS_COMMAND_COMPLETE;
+ goto logio_done;
+ }
+ data[0] = MBS_COMMAND_ERROR;
+ break;
case LSC_SCODE_NOXCB:
vha->hw->exch_starvation++;
if (vha->hw->exch_starvation > 5) {
@@ -1695,8 +1719,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
}
ql_dbg(ql_dbg_async, fcport->vha, 0x5037,
- "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x "
- "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+ "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x "
+ "iop0=%x iop1=%x.\n", type, fcport->port_name,
+ sp->handle, fcport->d_id.b.domain,
fcport->d_id.b.area, fcport->d_id.b.al_pa,
le16_to_cpu(logio->comp_status),
le32_to_cpu(logio->io_parameter[0]),
@@ -2679,7 +2704,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
return;
abt = &sp->u.iocb_cmd;
- abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
+ abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle);
sp->done(sp, 0);
}
@@ -2693,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
struct sts_entry_24xx *pkt;
struct qla_hw_data *ha = vha->hw;
- if (!vha->flags.online)
+ if (!ha->flags.fw_started)
return;
while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 35079f417417..a113ab3592a7 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -10,6 +10,28 @@
#include <linux/delay.h>
#include <linux/gfp.h>
+static struct mb_cmd_name {
+ uint16_t cmd;
+ const char *str;
+} mb_str[] = {
+ {MBC_GET_PORT_DATABASE, "GPDB"},
+ {MBC_GET_ID_LIST, "GIDList"},
+ {MBC_GET_LINK_PRIV_STATS, "Stats"},
+};
+
+static const char *mb_to_str(uint16_t cmd)
+{
+ int i;
+ struct mb_cmd_name *e;
+
+ for (i = 0; i < ARRAY_SIZE(mb_str); i++) {
+ e = mb_str + i;
+ if (cmd == e->cmd)
+ return e->str;
+ }
+ return "unknown";
+}
+
static struct rom_cmd {
uint16_t cmd;
} rom_cmds[] = {
@@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id,
int
qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
- dma_addr_t stats_dma, uint options)
+ dma_addr_t stats_dma, uint16_t options)
{
int rval;
mbx_cmd_t mc;
@@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088,
"Entered %s.\n", __func__);
- mcp->mb[0] = MBC_GET_LINK_PRIV_STATS;
- mcp->mb[2] = MSW(stats_dma);
- mcp->mb[3] = LSW(stats_dma);
- mcp->mb[6] = MSW(MSD(stats_dma));
- mcp->mb[7] = LSW(MSD(stats_dma));
- mcp->mb[8] = sizeof(struct link_statistics) / 4;
- mcp->mb[9] = vha->vp_idx;
- mcp->mb[10] = options;
- mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
- mcp->in_mb = MBX_2|MBX_1|MBX_0;
- mcp->tov = MBX_TOV_SECONDS;
- mcp->flags = IOCTL_CMD;
- rval = qla2x00_mailbox_command(vha, mcp);
+ memset(&mc, 0, sizeof(mc));
+ mc.mb[0] = MBC_GET_LINK_PRIV_STATS;
+ mc.mb[2] = MSW(stats_dma);
+ mc.mb[3] = LSW(stats_dma);
+ mc.mb[6] = MSW(MSD(stats_dma));
+ mc.mb[7] = LSW(MSD(stats_dma));
+ mc.mb[8] = sizeof(struct link_statistics) / 4;
+ mc.mb[9] = cpu_to_le16(vha->vp_idx);
+ mc.mb[10] = cpu_to_le16(options);
+
+ rval = qla24xx_send_mb_cmd(vha, &mc);
if (rval == QLA_SUCCESS) {
if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
@@ -3603,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
scsi_qla_host_t *vp = NULL;
unsigned long flags;
int found;
+ port_id_t id;
ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
"Entered %s.\n", __func__);
@@ -3610,28 +3631,27 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
if (rptid_entry->entry_status != 0)
return;
+ id.b.domain = rptid_entry->port_id[2];
+ id.b.area = rptid_entry->port_id[1];
+ id.b.al_pa = rptid_entry->port_id[0];
+ id.b.rsvd_1 = 0;
+
if (rptid_entry->format == 0) {
/* loop */
- ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
+ ql_dbg(ql_dbg_async, vha, 0x10b7,
"Format 0 : Number of VPs setup %d, number of "
"VPs acquired %d.\n", rptid_entry->vp_setup,
rptid_entry->vp_acquired);
- ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
+ ql_dbg(ql_dbg_async, vha, 0x10b8,
"Primary port id %02x%02x%02x.\n",
rptid_entry->port_id[2], rptid_entry->port_id[1],
rptid_entry->port_id[0]);
- vha->d_id.b.domain = rptid_entry->port_id[2];
- vha->d_id.b.area = rptid_entry->port_id[1];
- vha->d_id.b.al_pa = rptid_entry->port_id[0];
-
- spin_lock_irqsave(&ha->vport_slock, flags);
- qlt_update_vp_map(vha, SET_AL_PA);
- spin_unlock_irqrestore(&ha->vport_slock, flags);
+ qlt_update_host_map(vha, id);
} else if (rptid_entry->format == 1) {
/* fabric */
- ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
+ ql_dbg(ql_dbg_async, vha, 0x10b9,
"Format 1: VP[%d] enabled - status %d - with "
"port id %02x%02x%02x.\n", rptid_entry->vp_idx,
rptid_entry->vp_status,
@@ -3653,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
WWN_SIZE);
}
- vha->d_id.b.domain = rptid_entry->port_id[2];
- vha->d_id.b.area = rptid_entry->port_id[1];
- vha->d_id.b.al_pa = rptid_entry->port_id[0];
- spin_lock_irqsave(&ha->vport_slock, flags);
- qlt_update_vp_map(vha, SET_AL_PA);
- spin_unlock_irqrestore(&ha->vport_slock, flags);
+ qlt_update_host_map(vha, id);
}
fc_host_port_name(vha->host) =
@@ -3694,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
if (!found)
return;
- vp->d_id.b.domain = rptid_entry->port_id[2];
- vp->d_id.b.area = rptid_entry->port_id[1];
- vp->d_id.b.al_pa = rptid_entry->port_id[0];
- spin_lock_irqsave(&ha->vport_slock, flags);
- qlt_update_vp_map(vp, SET_AL_PA);
- spin_unlock_irqrestore(&ha->vport_slock, flags);
+ qlt_update_host_map(vp, id);
/*
* Cannot configure here as we are still sitting on the
@@ -5827,3 +5837,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
return rval;
}
+
+static void qla2x00_async_mb_sp_done(void *s, int res)
+{
+ struct srb *sp = s;
+
+ sp->u.iocb_cmd.u.mbx.rc = res;
+
+ complete(&sp->u.iocb_cmd.u.mbx.comp);
+ /* don't free sp here. Let the caller do the free */
+}
+
+/*
+ * This mailbox uses the iocb interface to send MB command.
+ * This allows non-critial (non chip setup) command to go
+ * out in parrallel.
+ */
+int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+{
+ int rval = QLA_FUNCTION_FAILED;
+ srb_t *sp;
+ struct srb_iocb *c;
+
+ if (!vha->hw->flags.fw_started)
+ goto done;
+
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_MB_IOCB;
+ sp->name = mb_to_str(mcp->mb[0]);
+
+ qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+ memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+ c = &sp->u.iocb_cmd;
+ c->timeout = qla2x00_async_iocb_timeout;
+ init_completion(&c->u.mbx.comp);
+
+ sp->done = qla2x00_async_mb_sp_done;
+
+ rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS) {
+ ql_dbg(ql_dbg_mbx, vha, 0xffff,
+ "%s: %s Failed submission. %x.\n",
+ __func__, sp->name, rval);
+ goto done_free_sp;
+ }
+
+ ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n",
+ sp->name, sp->handle);
+
+ wait_for_completion(&c->u.mbx.comp);
+ memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG);
+
+ rval = c->u.mbx.rc;
+ switch (rval) {
+ case QLA_FUNCTION_TIMEOUT:
+ ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n",
+ __func__, sp->name, rval);
+ break;
+ case QLA_SUCCESS:
+ ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n",
+ __func__, sp->name);
+ sp->free(sp);
+ break;
+ default:
+ ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n",
+ __func__, sp->name, rval);
+ sp->free(sp);
+ break;
+ }
+
+ return rval;
+
+done_free_sp:
+ sp->free(sp);
+done:
+ return rval;
+}
+
+/*
+ * qla24xx_gpdb_wait
+ * NOTE: Do not call this routine from DPC thread
+ */
+int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+ int rval = QLA_FUNCTION_FAILED;
+ dma_addr_t pd_dma;
+ struct port_database_24xx *pd;
+ struct qla_hw_data *ha = vha->hw;
+ mbx_cmd_t mc;
+
+ if (!vha->hw->flags.fw_started)
+ goto done;
+
+ pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+ if (pd == NULL) {
+ ql_log(ql_log_warn, vha, 0xffff,
+ "Failed to allocate port database structure.\n");
+ goto done_free_sp;
+ }
+ memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+ memset(&mc, 0, sizeof(mc));
+ mc.mb[0] = MBC_GET_PORT_DATABASE;
+ mc.mb[1] = cpu_to_le16(fcport->loop_id);
+ mc.mb[2] = MSW(pd_dma);
+ mc.mb[3] = LSW(pd_dma);
+ mc.mb[6] = MSW(MSD(pd_dma));
+ mc.mb[7] = LSW(MSD(pd_dma));
+ mc.mb[9] = cpu_to_le16(vha->vp_idx);
+ mc.mb[10] = cpu_to_le16((uint16_t)opt);
+
+ rval = qla24xx_send_mb_cmd(vha, &mc);
+ if (rval != QLA_SUCCESS) {
+ ql_dbg(ql_dbg_mbx, vha, 0xffff,
+ "%s: %8phC fail\n", __func__, fcport->port_name);
+ goto done_free_sp;
+ }
+
+ rval = __qla24xx_parse_gpdb(vha, fcport, pd);
+
+ ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n",
+ __func__, fcport->port_name);
+
+done_free_sp:
+ if (pd)
+ dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+done:
+ return rval;
+}
+
+int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
+ struct port_database_24xx *pd)
+{
+ int rval = QLA_SUCCESS;
+ uint64_t zero = 0;
+
+ /* Check for logged in state. */
+ if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+ pd->last_login_state != PDS_PRLI_COMPLETE) {
+ ql_dbg(ql_dbg_mbx, vha, 0xffff,
+ "Unable to verify login-state (%x/%x) for "
+ "loop_id %x.\n", pd->current_login_state,
+ pd->last_login_state, fcport->loop_id);
+ rval = QLA_FUNCTION_FAILED;
+ goto gpd_error_out;
+ }
+
+ if (fcport->loop_id == FC_NO_LOOP_ID ||
+ (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+ memcmp(fcport->port_name, pd->port_name, 8))) {
+ /* We lost the device mid way. */
+ rval = QLA_NOT_LOGGED_IN;
+ goto gpd_error_out;
+ }
+
+ /* Names are little-endian. */
+ memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+ memcpy(fcport->port_name, pd->port_name, WWN_SIZE);
+
+ /* Get port_id of device. */
+ fcport->d_id.b.domain = pd->port_id[0];
+ fcport->d_id.b.area = pd->port_id[1];
+ fcport->d_id.b.al_pa = pd->port_id[2];
+ fcport->d_id.b.rsvd_1 = 0;
+
+ /* If not target must be initiator or unknown type. */
+ if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+ fcport->port_type = FCT_INITIATOR;
+ else
+ fcport->port_type = FCT_TARGET;
+
+ /* Passback COS information. */
+ fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+ FC_COS_CLASS2 : FC_COS_CLASS3;
+
+ if (pd->prli_svc_param_word_3[0] & BIT_7) {
+ fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+ fcport->conf_compl_supported = 1;
+ }
+
+gpd_error_out:
+ return rval;
+}
+
+/*
+ * qla24xx_gidlist__wait
+ * NOTE: don't call this routine from DPC thread.
+ */
+int qla24xx_gidlist_wait(struct scsi_qla_host *vha,
+ void *id_list, dma_addr_t id_list_dma, uint16_t *entries)
+{
+ int rval = QLA_FUNCTION_FAILED;
+ mbx_cmd_t mc;
+
+ if (!vha->hw->flags.fw_started)
+ goto done;
+
+ memset(&mc, 0, sizeof(mc));
+ mc.mb[0] = MBC_GET_ID_LIST;
+ mc.mb[2] = MSW(id_list_dma);
+ mc.mb[3] = LSW(id_list_dma);
+ mc.mb[6] = MSW(MSD(id_list_dma));
+ mc.mb[7] = LSW(MSD(id_list_dma));
+ mc.mb[8] = 0;
+ mc.mb[9] = cpu_to_le16(vha->vp_idx);
+
+ rval = qla24xx_send_mb_cmd(vha, &mc);
+ if (rval != QLA_SUCCESS) {
+ ql_dbg(ql_dbg_mbx, vha, 0xffff,
+ "%s: fail\n", __func__);
+ } else {
+ *entries = mc.mb[1];
+ ql_dbg(ql_dbg_mbx, vha, 0xffff,
+ "%s: done\n", __func__);
+ }
+done:
+ return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c6d6f0d912ff..09a490c98763 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
* ensures no active vp_list traversal while the vport is removed
* from the queue)
*/
- spin_lock_irqsave(&ha->vport_slock, flags);
- while (atomic_read(&vha->vref_count)) {
- spin_unlock_irqrestore(&ha->vport_slock, flags);
-
- msleep(500);
+ wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+ 10*HZ);
- spin_lock_irqsave(&ha->vport_slock, flags);
+ spin_lock_irqsave(&ha->vport_slock, flags);
+ if (atomic_read(&vha->vref_count)) {
+ ql_dbg(ql_dbg_vport, vha, 0xfffa,
+ "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+ vha->vref_count = (atomic_t)ATOMIC_INIT(0);
}
list_del(&vha->list);
qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
spin_lock_irqsave(&ha->vport_slock, flags);
atomic_dec(&vha->vref_count);
+ wake_up(&vha->vref_waitq);
}
i++;
}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1fed235a1b4a..41d5b09f7326 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
return atomic_read(&vha->loop_state) == LOOP_READY;
}
+static void qla2x00_iocb_work_fn(struct work_struct *work)
+{
+ struct scsi_qla_host *vha = container_of(work,
+ struct scsi_qla_host, iocb_work);
+ int cnt = 0;
+
+ while (!list_empty(&vha->work_list)) {
+ qla2x00_do_work(vha);
+ cnt++;
+ if (cnt > 10)
+ break;
+ }
+}
+
/*
* PCI driver interface
*/
@@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
*/
qla2xxx_wake_dpc(base_vha);
+ INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error);
if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) {
@@ -3469,6 +3484,7 @@ qla2x00_remove_one(struct pci_dev *pdev)
qla2x00_free_sysfs_attr(base_vha, true);
fc_remove_host(base_vha->host);
+ qlt_remove_target_resources(ha);
scsi_remove_host(base_vha->host);
@@ -4268,6 +4284,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
spin_lock_init(&vha->work_lock);
spin_lock_init(&vha->cmd_list_lock);
init_waitqueue_head(&vha->fcport_waitQ);
+ init_waitqueue_head(&vha->vref_waitq);
vha->gnl.size = sizeof(struct get_name_list_extended) *
(ha->max_loop_id + 1);
@@ -4319,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
spin_lock_irqsave(&vha->work_lock, flags);
list_add_tail(&e->list, &vha->work_list);
spin_unlock_irqrestore(&vha->work_lock, flags);
- qla2xxx_wake_dpc(vha);
+
+ if (QLA_EARLY_LINKUP(vha->hw))
+ schedule_work(&vha->iocb_work);
+ else
+ qla2xxx_wake_dpc(vha);
return QLA_SUCCESS;
}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 45f5077684f0..0e03ca2ab3e5 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
fc_port_t *fcport, bool local);
void qlt_unreg_sess(struct fc_port *sess);
+static void qlt_24xx_handle_abts(struct scsi_qla_host *,
+ struct abts_recv_from_24xx *);
+
/*
* Global Variables
*/
@@ -140,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq;
static DEFINE_MUTEX(qla_tgt_mutex);
static LIST_HEAD(qla_tgt_glist);
+static const char *prot_op_str(u32 prot_op)
+{
+ switch (prot_op) {
+ case TARGET_PROT_NORMAL: return "NORMAL";
+ case TARGET_PROT_DIN_INSERT: return "DIN_INSERT";
+ case TARGET_PROT_DOUT_INSERT: return "DOUT_INSERT";
+ case TARGET_PROT_DIN_STRIP: return "DIN_STRIP";
+ case TARGET_PROT_DOUT_STRIP: return "DOUT_STRIP";
+ case TARGET_PROT_DIN_PASS: return "DIN_PASS";
+ case TARGET_PROT_DOUT_PASS: return "DOUT_PASS";
+ default: return "UNKNOWN";
+ }
+}
+
/* This API intentionally takes dest as a parameter, rather than returning
* int value to avoid caller forgetting to issue wmb() after the store */
void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -170,21 +187,23 @@ static inline
struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
uint8_t *d_id)
{
- struct qla_hw_data *ha = vha->hw;
- uint8_t vp_idx;
-
- if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0]))
- return NULL;
+ struct scsi_qla_host *host;
+ uint32_t key = 0;
- if (vha->d_id.b.al_pa == d_id[2])
+ if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
+ (vha->d_id.b.al_pa == d_id[2]))
return vha;
- BUG_ON(ha->tgt.tgt_vp_map == NULL);
- vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx;
- if (likely(test_bit(vp_idx, ha->vp_idx_map)))
- return ha->tgt.tgt_vp_map[vp_idx].vha;
+ key = (uint32_t)d_id[0] << 16;
+ key |= (uint32_t)d_id[1] << 8;
+ key |= (uint32_t)d_id[2];
- return NULL;
+ host = btree_lookup32(&vha->hw->tgt.host_map, key);
+ if (!host)
+ ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+ "Unable to find host %06x\n", key);
+
+ return host;
}
static inline
@@ -389,6 +408,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
(struct abts_recv_from_24xx *)atio;
struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
entry->vp_index);
+ unsigned long flags;
+
if (unlikely(!host)) {
ql_dbg(ql_dbg_tgt, vha, 0xffff,
"qla_target(%d): Response pkt (ABTS_RECV_24XX) "
@@ -396,9 +417,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
vha->vp_idx, entry->vp_index);
break;
}
- qlt_response_pkt(host, (response_t *)atio);
+ if (!ha_locked)
+ spin_lock_irqsave(&host->hw->hardware_lock, flags);
+ qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio);
+ if (!ha_locked)
+ spin_unlock_irqrestore(&host->hw->hardware_lock, flags);
break;
-
}
/* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
@@ -554,6 +578,7 @@ void qla2x00_async_nack_sp_done(void *s, int res)
sp->fcport->login_gen++;
sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
sp->fcport->logout_on_delete = 1;
+ sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
break;
case SRB_NACK_PRLI:
@@ -613,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
break;
case SRB_NACK_PRLI:
fcport->fw_login_state = DSC_LS_PRLI_PEND;
+ fcport->deleted = 0;
c = "PRLI";
break;
case SRB_NACK_LOGO:
@@ -1215,7 +1241,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
}
/* Get list of logged in devices */
- rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries);
+ rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries);
if (rc != QLA_SUCCESS) {
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045,
"qla_target(%d): get_id_list() failed: %x\n",
@@ -1551,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
request_t *pkt;
struct nack_to_isp *nack;
+ if (!ha->flags.fw_started)
+ return;
+
ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha);
/* Send marker if required */
@@ -2013,6 +2042,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
}
EXPORT_SYMBOL(qlt_free_mcmd);
+/*
+ * ha->hardware_lock supposed to be held on entry. Might drop it, then
+ * reacquire
+ */
+void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
+ uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq)
+{
+ struct atio_from_isp *atio = &cmd->atio;
+ struct ctio7_to_24xx *ctio;
+ uint16_t temp;
+
+ ql_dbg(ql_dbg_tgt_dif, vha, 0x3066,
+ "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, "
+ "sense_key=%02x, asc=%02x, ascq=%02x",
+ vha, atio, scsi_status, sense_key, asc, ascq);
+
+ ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL);
+ if (!ctio) {
+ ql_dbg(ql_dbg_async, vha, 0x3067,
+ "qla2x00t(%ld): %s failed: unable to allocate request packet",
+ vha->host_no, __func__);
+ goto out;
+ }
+
+ ctio->entry_type = CTIO_TYPE7;
+ ctio->entry_count = 1;
+ ctio->handle = QLA_TGT_SKIP_HANDLE;
+ ctio->nport_handle = cmd->sess->loop_id;
+ ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
+ ctio->vp_index = vha->vp_idx;
+ ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+ ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+ ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+ ctio->exchange_addr = atio->u.isp24.exchange_addr;
+ ctio->u.status1.flags = (atio->u.isp24.attr << 9) |
+ cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS);
+ temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+ ctio->u.status1.ox_id = cpu_to_le16(temp);
+ ctio->u.status1.scsi_status =
+ cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status);
+ ctio->u.status1.response_len = cpu_to_le16(18);
+ ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio));
+
+ if (ctio->u.status1.residual != 0)
+ ctio->u.status1.scsi_status |=
+ cpu_to_le16(SS_RESIDUAL_UNDER);
+
+ /* Response code and sense key */
+ put_unaligned_le32(((0x70 << 24) | (sense_key << 8)),
+ (&ctio->u.status1.sense_data)[0]);
+ /* Additional sense length */
+ put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]);
+ /* ASC and ASCQ */
+ put_unaligned_le32(((asc << 24) | (ascq << 16)),
+ (&ctio->u.status1.sense_data)[3]);
+
+ /* Memory Barrier */
+ wmb();
+
+ qla2x00_start_iocbs(vha, vha->req);
+out:
+ return;
+}
+
/* callback from target fabric module code */
void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
{
@@ -2261,7 +2354,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm,
*/
return -EAGAIN;
} else
- ha->tgt.cmds[h-1] = prm->cmd;
+ ha->tgt.cmds[h - 1] = prm->cmd;
pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
pkt->nport_handle = prm->cmd->loop_id;
@@ -2391,6 +2484,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd)
return cmd->bufflen > 0;
}
+static void qlt_print_dif_err(struct qla_tgt_prm *prm)
+{
+ struct qla_tgt_cmd *cmd;
+ struct scsi_qla_host *vha;
+
+ /* asc 0x10=dif error */
+ if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) {
+ cmd = prm->cmd;
+ vha = cmd->vha;
+ /* ASCQ */
+ switch (prm->sense_buffer[13]) {
+ case 1:
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+ "se_cmd=%p tag[%x]",
+ cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+ cmd->atio.u.isp24.exchange_addr);
+ break;
+ case 2:
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+ "se_cmd=%p tag[%x]",
+ cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+ cmd->atio.u.isp24.exchange_addr);
+ break;
+ case 3:
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+ "se_cmd=%p tag[%x]",
+ cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+ cmd->atio.u.isp24.exchange_addr);
+ break;
+ default:
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "BE detected Dif ERR: lba[%llx|%lld] len[%x] "
+ "se_cmd=%p tag[%x]",
+ cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+ cmd->atio.u.isp24.exchange_addr);
+ break;
+ }
+ ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16);
+ }
+}
+
/*
* Called without ha->hardware_lock held
*/
@@ -2512,18 +2649,9 @@ skip_explict_conf:
for (i = 0; i < prm->sense_buffer_len/4; i++)
((uint32_t *)ctio->u.status1.sense_data)[i] =
cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]);
-#if 0
- if (unlikely((prm->sense_buffer_len % 4) != 0)) {
- static int q;
- if (q < 10) {
- ql_dbg(ql_dbg_tgt, vha, 0xe04f,
- "qla_target(%d): %d bytes of sense "
- "lost", prm->tgt->ha->vp_idx,
- prm->sense_buffer_len % 4);
- q++;
- }
- }
-#endif
+
+ qlt_print_dif_err(prm);
+
} else {
ctio->u.status1.flags &=
~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0);
@@ -2537,19 +2665,9 @@ skip_explict_conf:
/* Sense with len > 24, is it possible ??? */
}
-
-
-/* diff */
static inline int
qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
{
- /*
- * Uncomment when corresponding SCSI changes are done.
- *
- if (!sp->cmd->prot_chk)
- return 0;
- *
- */
switch (se_cmd->prot_op) {
case TARGET_PROT_DOUT_INSERT:
case TARGET_PROT_DIN_STRIP:
@@ -2570,16 +2688,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
return 0;
}
+static inline int
+qla_tgt_ref_mask_check(struct se_cmd *se_cmd)
+{
+ switch (se_cmd->prot_op) {
+ case TARGET_PROT_DIN_INSERT:
+ case TARGET_PROT_DOUT_INSERT:
+ case TARGET_PROT_DIN_STRIP:
+ case TARGET_PROT_DOUT_STRIP:
+ case TARGET_PROT_DIN_PASS:
+ case TARGET_PROT_DOUT_PASS:
+ return 1;
+ default:
+ return 0;
+ }
+ return 0;
+}
+
/*
- * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
- *
+ * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command
*/
-static inline void
-qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+static void
+qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx,
+ uint16_t *pfw_prot_opts)
{
+ struct se_cmd *se_cmd = &cmd->se_cmd;
uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+ scsi_qla_host_t *vha = cmd->tgt->vha;
+ struct qla_hw_data *ha = vha->hw;
+ uint32_t t32 = 0;
- /* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+ /*
+ * wait till Mode Sense/Select cmd, modepage Ah, subpage 2
* have been immplemented by TCM, before AppTag is avail.
* Look for modesense_handlers[]
*/
@@ -2587,65 +2727,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
ctx->app_tag_mask[0] = 0x0;
ctx->app_tag_mask[1] = 0x0;
+ if (IS_PI_UNINIT_CAPABLE(ha)) {
+ if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+ (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+ *pfw_prot_opts |= PO_DIS_VALD_APP_ESC;
+ else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+ *pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+ }
+
+ t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts);
+
switch (se_cmd->prot_type) {
case TARGET_DIF_TYPE0_PROT:
/*
- * No check for ql2xenablehba_err_chk, as it would be an
- * I/O error if hba tag generation is not done.
+ * No check for ql2xenablehba_err_chk, as it
+ * would be an I/O error if hba tag generation
+ * is not done.
*/
ctx->ref_tag = cpu_to_le32(lba);
-
- if (!qlt_hba_err_chk_enabled(se_cmd))
- break;
-
/* enable ALL bytes of the ref tag */
ctx->ref_tag_mask[0] = 0xff;
ctx->ref_tag_mask[1] = 0xff;
ctx->ref_tag_mask[2] = 0xff;
ctx->ref_tag_mask[3] = 0xff;
break;
- /*
- * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
- * 16 bit app tag.
- */
case TARGET_DIF_TYPE1_PROT:
- ctx->ref_tag = cpu_to_le32(lba);
-
- if (!qlt_hba_err_chk_enabled(se_cmd))
- break;
-
- /* enable ALL bytes of the ref tag */
- ctx->ref_tag_mask[0] = 0xff;
- ctx->ref_tag_mask[1] = 0xff;
- ctx->ref_tag_mask[2] = 0xff;
- ctx->ref_tag_mask[3] = 0xff;
- break;
- /*
- * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
- * match LBA in CDB + N
- */
+ /*
+ * For TYPE 1 protection: 16 bit GUARD tag, 32 bit
+ * REF tag, and 16 bit app tag.
+ */
+ ctx->ref_tag = cpu_to_le32(lba);
+ if (!qla_tgt_ref_mask_check(se_cmd) ||
+ !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+ *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+ break;
+ }
+ /* enable ALL bytes of the ref tag */
+ ctx->ref_tag_mask[0] = 0xff;
+ ctx->ref_tag_mask[1] = 0xff;
+ ctx->ref_tag_mask[2] = 0xff;
+ ctx->ref_tag_mask[3] = 0xff;
+ break;
case TARGET_DIF_TYPE2_PROT:
- ctx->ref_tag = cpu_to_le32(lba);
-
- if (!qlt_hba_err_chk_enabled(se_cmd))
- break;
-
- /* enable ALL bytes of the ref tag */
- ctx->ref_tag_mask[0] = 0xff;
- ctx->ref_tag_mask[1] = 0xff;
- ctx->ref_tag_mask[2] = 0xff;
- ctx->ref_tag_mask[3] = 0xff;
- break;
-
- /* For Type 3 protection: 16 bit GUARD only */
+ /*
+ * For TYPE 2 protection: 16 bit GUARD + 32 bit REF
+ * tag has to match LBA in CDB + N
+ */
+ ctx->ref_tag = cpu_to_le32(lba);
+ if (!qla_tgt_ref_mask_check(se_cmd) ||
+ !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+ *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+ break;
+ }
+ /* enable ALL bytes of the ref tag */
+ ctx->ref_tag_mask[0] = 0xff;
+ ctx->ref_tag_mask[1] = 0xff;
+ ctx->ref_tag_mask[2] = 0xff;
+ ctx->ref_tag_mask[3] = 0xff;
+ break;
case TARGET_DIF_TYPE3_PROT:
- ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
- ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
- break;
+ /* For TYPE 3 protection: 16 bit GUARD only */
+ *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+ ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+ ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+ break;
}
}
-
static inline int
qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
{
@@ -2664,6 +2812,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
struct se_cmd *se_cmd = &cmd->se_cmd;
uint32_t h;
struct atio_from_isp *atio = &prm->cmd->atio;
+ struct qla_tc_param tc;
uint16_t t16;
ha = vha->hw;
@@ -2689,16 +2838,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
transfer_length = data_bytes;
- data_bytes += dif_bytes;
+ if (cmd->prot_sg_cnt)
+ data_bytes += dif_bytes;
break;
-
case TARGET_PROT_DIN_STRIP:
case TARGET_PROT_DOUT_INSERT:
case TARGET_PROT_DIN_PASS:
case TARGET_PROT_DOUT_PASS:
transfer_length = data_bytes + dif_bytes;
break;
-
default:
BUG();
break;
@@ -2734,7 +2882,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
break;
}
-
/* ---- PKT ---- */
/* Update entry type to indicate Command Type CRC_2 IOCB */
pkt->entry_type = CTIO_CRC2;
@@ -2752,9 +2899,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
} else
ha->tgt.cmds[h-1] = prm->cmd;
-
pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
- pkt->nport_handle = prm->cmd->loop_id;
+ pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
@@ -2775,12 +2921,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
-
pkt->dseg_count = prm->tot_dsds;
/* Fibre channel byte count */
pkt->transfer_length = cpu_to_le32(transfer_length);
-
/* ----- CRC context -------- */
/* Allocate CRC context from global pool */
@@ -2800,13 +2944,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
/* Set handle */
crc_ctx_pkt->handle = pkt->handle;
- qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+ qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts);
pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
-
if (!bundling) {
cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
} else {
@@ -2827,16 +2970,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
crc_ctx_pkt->guard_seed = cpu_to_le16(0);
+ memset((uint8_t *)&tc, 0 , sizeof(tc));
+ tc.vha = vha;
+ tc.blk_sz = cmd->blk_sz;
+ tc.bufflen = cmd->bufflen;
+ tc.sg = cmd->sg;
+ tc.prot_sg = cmd->prot_sg;
+ tc.ctx = crc_ctx_pkt;
+ tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced;
/* Walks data segments */
pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
if (!bundling && prm->prot_seg_cnt) {
if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
- prm->tot_dsds, cmd))
+ prm->tot_dsds, &tc))
goto crc_queuing_error;
} else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
- (prm->tot_dsds - prm->prot_seg_cnt), cmd))
+ (prm->tot_dsds - prm->prot_seg_cnt), &tc))
goto crc_queuing_error;
if (bundling && prm->prot_seg_cnt) {
@@ -2845,18 +2996,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
- prm->prot_seg_cnt, cmd))
+ prm->prot_seg_cnt, &tc))
goto crc_queuing_error;
}
return QLA_SUCCESS;
crc_queuing_error:
/* Cleanup will be performed by the caller */
+ vha->hw->tgt.cmds[h - 1] = NULL;
return QLA_FUNCTION_FAILED;
}
-
/*
* Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
* QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -2906,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
else
vha->tgt_counters.core_qla_que_buf++;
- if (!vha->flags.online || cmd->reset_count != ha->chip_reset) {
+ if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) {
/*
* Either the port is not online or this request was from
* previous life, just abort the processing.
@@ -3047,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
spin_lock_irqsave(&ha->hardware_lock, flags);
- if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
+ if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) ||
(cmd->sess && cmd->sess->deleted)) {
/*
* Either the port is not online or this request was from
@@ -3104,139 +3255,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer);
/*
- * Checks the guard or meta-data for the type of error
- * detected by the HBA.
+ * it is assumed either hardware_lock or qpair lock is held.
*/
-static inline int
+static void
qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
- struct ctio_crc_from_fw *sts)
+ struct ctio_crc_from_fw *sts)
{
uint8_t *ap = &sts->actual_dif[0];
uint8_t *ep = &sts->expected_dif[0];
- uint32_t e_ref_tag, a_ref_tag;
- uint16_t e_app_tag, a_app_tag;
- uint16_t e_guard, a_guard;
uint64_t lba = cmd->se_cmd.t_task_lba;
+ uint8_t scsi_status, sense_key, asc, ascq;
+ unsigned long flags;
- a_guard = be16_to_cpu(*(uint16_t *)(ap + 0));
- a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
- a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-
- e_guard = be16_to_cpu(*(uint16_t *)(ep + 0));
- e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
- e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
-
- ql_dbg(ql_dbg_tgt, vha, 0xe075,
- "iocb(s) %p Returned STATUS.\n", sts);
-
- ql_dbg(ql_dbg_tgt, vha, 0xf075,
- "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
- cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
- a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
-
- /*
- * Ignore sector if:
- * For type 3: ref & app tag is all 'f's
- * For type 0,1,2: app tag is all 'f's
- */
- if ((a_app_tag == 0xffff) &&
- ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
- (a_ref_tag == 0xffffffff))) {
- uint32_t blocks_done;
-
- /* 2TB boundary case covered automatically with this */
- blocks_done = e_ref_tag - (uint32_t)lba + 1;
- cmd->se_cmd.bad_sector = e_ref_tag;
- cmd->se_cmd.pi_err = 0;
- ql_dbg(ql_dbg_tgt, vha, 0xf074,
- "need to return scsi good\n");
-
- /* Update protection tag */
- if (cmd->prot_sg_cnt) {
- uint32_t i, k = 0, num_ent;
- struct scatterlist *sg, *sgl;
-
-
- sgl = cmd->prot_sg;
-
- /* Patch the corresponding protection tags */
- for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
- num_ent = sg_dma_len(sg) / 8;
- if (k + num_ent < blocks_done) {
- k += num_ent;
- continue;
- }
- k = blocks_done;
- break;
- }
+ cmd->trc_flags |= TRC_DIF_ERR;
- if (k != blocks_done) {
- ql_log(ql_log_warn, vha, 0xf076,
- "unexpected tag values tag:lba=%u:%llu)\n",
- e_ref_tag, (unsigned long long)lba);
- goto out;
- }
+ cmd->a_guard = be16_to_cpu(*(uint16_t *)(ap + 0));
+ cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+ cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-#if 0
- struct sd_dif_tuple *spt;
- /* TODO:
- * This section came from initiator. Is it valid here?
- * should ulp be override with actual val???
- */
- spt = page_address(sg_page(sg)) + sg->offset;
- spt += j;
+ cmd->e_guard = be16_to_cpu(*(uint16_t *)(ep + 0));
+ cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+ cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
- spt->app_tag = 0xffff;
- if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
- spt->ref_tag = 0xffffffff;
-#endif
- }
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xf075,
+ "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state);
- return 0;
- }
+ scsi_status = sense_key = asc = ascq = 0;
- /* check guard */
- if (e_guard != a_guard) {
- cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
- cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
- ql_log(ql_log_warn, vha, 0xe076,
- "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
- cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
- a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
- a_guard, e_guard, cmd);
- goto out;
+ /* check appl tag */
+ if (cmd->e_app_tag != cmd->a_app_tag) {
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+ "Ref[%x|%x], App[%x|%x], "
+ "Guard [%x|%x] cmd=%p ox_id[%04x]",
+ cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+ cmd->a_ref_tag, cmd->e_ref_tag,
+ cmd->a_app_tag, cmd->e_app_tag,
+ cmd->a_guard, cmd->e_guard,
+ cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+ cmd->dif_err_code = DIF_ERR_APP;
+ scsi_status = SAM_STAT_CHECK_CONDITION;
+ sense_key = ABORTED_COMMAND;
+ asc = 0x10;
+ ascq = 0x2;
}
/* check ref tag */
- if (e_ref_tag != a_ref_tag) {
- cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
- cmd->se_cmd.bad_sector = e_ref_tag;
-
- ql_log(ql_log_warn, vha, 0xe077,
- "Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
- cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
- a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
- a_guard, e_guard, cmd);
+ if (cmd->e_ref_tag != cmd->a_ref_tag) {
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+ "Ref[%x|%x], App[%x|%x], "
+ "Guard[%x|%x] cmd=%p ox_id[%04x] ",
+ cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+ cmd->a_ref_tag, cmd->e_ref_tag,
+ cmd->a_app_tag, cmd->e_app_tag,
+ cmd->a_guard, cmd->e_guard,
+ cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+ cmd->dif_err_code = DIF_ERR_REF;
+ scsi_status = SAM_STAT_CHECK_CONDITION;
+ sense_key = ABORTED_COMMAND;
+ asc = 0x10;
+ ascq = 0x3;
goto out;
}
- /* check appl tag */
- if (e_app_tag != a_app_tag) {
- cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
- cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
- ql_log(ql_log_warn, vha, 0xe078,
- "App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
- cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
- a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
- a_guard, e_guard, cmd);
- goto out;
+ /* check guard */
+ if (cmd->e_guard != cmd->a_guard) {
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+ "Ref[%x|%x], App[%x|%x], "
+ "Guard [%x|%x] cmd=%p ox_id[%04x]",
+ cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+ cmd->a_ref_tag, cmd->e_ref_tag,
+ cmd->a_app_tag, cmd->e_app_tag,
+ cmd->a_guard, cmd->e_guard,
+ cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+ cmd->dif_err_code = DIF_ERR_GRD;
+ scsi_status = SAM_STAT_CHECK_CONDITION;
+ sense_key = ABORTED_COMMAND;
+ asc = 0x10;
+ ascq = 0x1;
}
out:
- return 1;
-}
+ switch (cmd->state) {
+ case QLA_TGT_STATE_NEED_DATA:
+ /* handle_data will load DIF error code */
+ cmd->state = QLA_TGT_STATE_DATA_IN;
+ vha->hw->tgt.tgt_ops->handle_data(cmd);
+ break;
+ default:
+ spin_lock_irqsave(&cmd->cmd_lock, flags);
+ if (cmd->aborted) {
+ spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+ vha->hw->tgt.tgt_ops->free_cmd(cmd);
+ break;
+ }
+ spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+ qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq);
+ /* assume scsi status gets out on the wire.
+ * Will not wait for completion.
+ */
+ vha->hw->tgt.tgt_ops->free_cmd(cmd);
+ break;
+ }
+}
/* If hardware_lock held on entry, might drop it, then reaquire */
/* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
@@ -3251,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha,
ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c,
"Sending TERM ELS CTIO (ha=%p)\n", ha);
- pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL);
+ pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL);
if (pkt == NULL) {
ql_dbg(ql_dbg_tgt, vha, 0xe080,
"qla_target(%d): %s failed: unable to allocate "
@@ -3543,6 +3668,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
{
int term = 0;
+ if (cmd->se_cmd.prot_op)
+ ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+ "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
+ "se_cmd=%p tag[%x] op %#x/%s",
+ cmd->lba, cmd->lba,
+ cmd->num_blks, &cmd->se_cmd,
+ cmd->atio.u.isp24.exchange_addr,
+ cmd->se_cmd.prot_op,
+ prot_op_str(cmd->se_cmd.prot_op));
+
if (ctio != NULL) {
struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
term = !(c->flags &
@@ -3760,32 +3895,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
struct ctio_crc_from_fw *crc =
(struct ctio_crc_from_fw *)ctio;
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
- "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+ "qla_target(%d): CTIO with DIF_ERROR status %x "
+ "received (state %x, ulp_cmd %p) actual_dif[0x%llx] "
+ "expect_dif[0x%llx]\n",
vha->vp_idx, status, cmd->state, se_cmd,
*((u64 *)&crc->actual_dif[0]),
*((u64 *)&crc->expected_dif[0]));
- if (qlt_handle_dif_error(vha, cmd, ctio)) {
- if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
- /* scsi Write/xfer rdy complete */
- goto skip_term;
- } else {
- /* scsi read/xmit respond complete
- * call handle dif to send scsi status
- * rather than terminate exchange.
- */
- cmd->state = QLA_TGT_STATE_PROCESSED;
- ha->tgt.tgt_ops->handle_dif_err(cmd);
- return;
- }
- } else {
- /* Need to generate a SCSI good completion.
- * because FW did not send scsi status.
- */
- status = 0;
- goto skip_term;
- }
- break;
+ qlt_handle_dif_error(vha, cmd, ctio);
+ return;
}
default:
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
@@ -3808,7 +3926,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
return;
}
}
-skip_term:
if (cmd->state == QLA_TGT_STATE_PROCESSED) {
cmd->trc_flags |= TRC_CTIO_DONE;
@@ -4584,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
}
if (sess != NULL) {
- if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
+ if (sess->fw_login_state != DSC_LS_PLOGI_PEND &&
+ sess->fw_login_state != DSC_LS_PLOGI_COMP) {
/*
* Impatient initiator sent PRLI before last
* PLOGI could finish. Will force him to re-try,
@@ -4623,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
/* Make session global (not used in fabric mode) */
if (ha->current_topology != ISP_CFG_F) {
- set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
- set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
- qla2xxx_wake_dpc(vha);
+ if (sess) {
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "%s %d %8phC post nack\n",
+ __func__, __LINE__, sess->port_name);
+ qla24xx_post_nack_work(vha, sess, iocb,
+ SRB_NACK_PRLI);
+ res = 0;
+ } else {
+ set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+ set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ }
} else {
if (sess) {
ql_dbg(ql_dbg_disc, vha, 0xffff,
- "%s %d %8phC post nack\n",
- __func__, __LINE__, sess->port_name);
-
+ "%s %d %8phC post nack\n",
+ __func__, __LINE__, sess->port_name);
qla24xx_post_nack_work(vha, sess, iocb,
SRB_NACK_PRLI);
res = 0;
@@ -4639,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
}
break;
-
case ELS_TPRLO:
if (le16_to_cpu(iocb->u.isp24.flags) &
NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
@@ -5079,16 +5204,22 @@ qlt_send_busy(struct scsi_qla_host *vha,
static int
qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha,
- struct atio_from_isp *atio)
+ struct atio_from_isp *atio, bool ha_locked)
{
struct qla_hw_data *ha = vha->hw;
uint16_t status;
+ unsigned long flags;
if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha))
return 0;
+ if (!ha_locked)
+ spin_lock_irqsave(&ha->hardware_lock, flags);
status = temp_sam_status;
qlt_send_busy(vha, atio, status);
+ if (!ha_locked)
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
return 1;
}
@@ -5103,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
unsigned long flags;
if (unlikely(tgt == NULL)) {
- ql_dbg(ql_dbg_io, vha, 0x3064,
+ ql_dbg(ql_dbg_tgt, vha, 0x3064,
"ATIO pkt, but no tgt (ha %p)", ha);
return;
}
@@ -5133,7 +5264,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) {
- rc = qlt_chk_qfull_thresh_hold(vha, atio);
+ rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked);
if (rc != 0) {
tgt->atio_irq_cmd_count--;
return;
@@ -5256,7 +5387,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
break;
}
- rc = qlt_chk_qfull_thresh_hold(vha, atio);
+ rc = qlt_chk_qfull_thresh_hold(vha, atio, true);
if (rc != 0) {
tgt->irq_cmd_count--;
return;
@@ -5531,7 +5662,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
fcport->loop_id = loop_id;
- rc = qla2x00_get_port_database(vha, fcport, 0);
+ rc = qla24xx_gpdb_wait(vha, fcport, 0);
if (rc != QLA_SUCCESS) {
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070,
"qla_target(%d): Failed to retrieve fcport "
@@ -5713,30 +5844,23 @@ static void qlt_abort_work(struct qla_tgt *tgt,
}
}
- spin_lock_irqsave(&ha->hardware_lock, flags);
-
- if (tgt->tgt_stop)
- goto out_term;
-
rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess);
+ ha->tgt.tgt_ops->put_sess(sess);
+ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
+
if (rc != 0)
goto out_term;
- spin_unlock_irqrestore(&ha->hardware_lock, flags);
- if (sess)
- ha->tgt.tgt_ops->put_sess(sess);
- spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
return;
out_term2:
- spin_lock_irqsave(&ha->hardware_lock, flags);
+ if (sess)
+ ha->tgt.tgt_ops->put_sess(sess);
+ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
out_term:
+ spin_lock_irqsave(&ha->hardware_lock, flags);
qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
- if (sess)
- ha->tgt.tgt_ops->put_sess(sess);
- spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
}
static void qlt_tmr_work(struct qla_tgt *tgt,
@@ -5756,7 +5880,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
spin_lock_irqsave(&ha->tgt.sess_lock, flags);
if (tgt->tgt_stop)
- goto out_term;
+ goto out_term2;
s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -5768,11 +5892,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
spin_lock_irqsave(&ha->tgt.sess_lock, flags);
if (!sess)
- goto out_term;
+ goto out_term2;
} else {
if (sess->deleted) {
sess = NULL;
- goto out_term;
+ goto out_term2;
}
if (!kref_get_unless_zero(&sess->sess_kref)) {
@@ -5780,7 +5904,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
"%s: kref_get fail %8phC\n",
__func__, sess->port_name);
sess = NULL;
- goto out_term;
+ goto out_term2;
}
}
@@ -5790,17 +5914,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
- if (rc != 0)
- goto out_term;
-
ha->tgt.tgt_ops->put_sess(sess);
spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+ if (rc != 0)
+ goto out_term;
return;
+out_term2:
+ if (sess)
+ ha->tgt.tgt_ops->put_sess(sess);
+ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
out_term:
qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
- ha->tgt.tgt_ops->put_sess(sess);
- spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
}
static void qlt_sess_work_fn(struct work_struct *work)
@@ -5893,13 +6019,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX;
tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX;
- if (base_vha->fc_vport)
- return 0;
-
mutex_lock(&qla_tgt_mutex);
list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist);
mutex_unlock(&qla_tgt_mutex);
+ if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target)
+ ha->tgt.tgt_ops->add_target(base_vha);
+
return 0;
}
@@ -5928,6 +6054,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
return 0;
}
+void qlt_remove_target_resources(struct qla_hw_data *ha)
+{
+ struct scsi_qla_host *node;
+ u32 key = 0;
+
+ btree_for_each_safe32(&ha->tgt.host_map, key, node)
+ btree_remove32(&ha->tgt.host_map, key);
+
+ btree_destroy32(&ha->tgt.host_map);
+}
+
static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
unsigned char *b)
{
@@ -6234,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
struct atio_from_isp *pkt;
int cnt, i;
- if (!vha->flags.online)
+ if (!ha->flags.fw_started)
return;
while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) ||
@@ -6581,6 +6718,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
void
qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
{
+ int rc;
+
if (!QLA_TGT_MODE_ENABLED())
return;
@@ -6600,6 +6739,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
qlt_unknown_atio_work_fn);
qlt_clear_mode(base_vha);
+
+ rc = btree_init32(&ha->tgt.host_map);
+ if (rc)
+ ql_log(ql_log_info, base_vha, 0xffff,
+ "Unable to initialize ha->host_map btree\n");
+
+ qlt_update_vp_map(base_vha, SET_VP_IDX);
}
irqreturn_t
@@ -6642,6 +6788,8 @@ qlt_handle_abts_recv_work(struct work_struct *work)
spin_lock_irqsave(&ha->hardware_lock, flags);
qlt_response_pkt_all_vps(vha, (response_t *)&op->atio);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ kfree(op);
}
void
@@ -6706,25 +6854,69 @@ qlt_mem_free(struct qla_hw_data *ha)
void
qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
{
+ void *slot;
+ u32 key;
+ int rc;
+
if (!QLA_TGT_MODE_ENABLED())
return;
+ key = vha->d_id.b24;
+
switch (cmd) {
case SET_VP_IDX:
vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
break;
case SET_AL_PA:
- vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx;
+ slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+ if (!slot) {
+ ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+ "Save vha in host_map %p %06x\n", vha, key);
+ rc = btree_insert32(&vha->hw->tgt.host_map,
+ key, vha, GFP_ATOMIC);
+ if (rc)
+ ql_log(ql_log_info, vha, 0xffff,
+ "Unable to insert s_id into host_map: %06x\n",
+ key);
+ return;
+ }
+ ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+ "replace existing vha in host_map %p %06x\n", vha, key);
+ btree_update32(&vha->hw->tgt.host_map, key, vha);
break;
case RESET_VP_IDX:
vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
break;
case RESET_AL_PA:
- vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0;
+ ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+ "clear vha in host_map %p %06x\n", vha, key);
+ slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+ if (slot)
+ btree_remove32(&vha->hw->tgt.host_map, key);
+ vha->d_id.b24 = 0;
break;
}
}
+void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id)
+{
+ unsigned long flags;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (!vha->d_id.b24) {
+ spin_lock_irqsave(&ha->vport_slock, flags);
+ vha->d_id = id;
+ qlt_update_vp_map(vha, SET_AL_PA);
+ spin_unlock_irqrestore(&ha->vport_slock, flags);
+ } else if (vha->d_id.b24 != id.b24) {
+ spin_lock_irqsave(&ha->vport_slock, flags);
+ qlt_update_vp_map(vha, RESET_AL_PA);
+ vha->d_id = id;
+ qlt_update_vp_map(vha, SET_AL_PA);
+ spin_unlock_irqrestore(&ha->vport_slock, flags);
+ }
+}
+
static int __init qlt_parse_ini_mode(void)
{
if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0)
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index a7f90dcaae37..d64420251194 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio)
atio->u.isp24.fcp_cmnd.add_cdb_len = 0;
}
+static inline int get_datalen_for_atio(struct atio_from_isp *atio)
+{
+ int len = atio->u.isp24.fcp_cmnd.add_cdb_len;
+
+ return (be32_to_cpu(get_unaligned((uint32_t *)
+ &atio->u.isp24.fcp_cmnd.add_cdb[len * 4])));
+}
+
#define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */
/*
@@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl {
int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
unsigned char *, uint32_t, int, int, int);
void (*handle_data)(struct qla_tgt_cmd *);
- void (*handle_dif_err)(struct qla_tgt_cmd *);
int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
uint32_t);
void (*free_cmd)(struct qla_tgt_cmd *);
@@ -684,6 +691,9 @@ struct qla_tgt_func_tmpl {
void (*clear_nacl_from_fcport_map)(struct fc_port *);
void (*put_sess)(struct fc_port *);
void (*shutdown_sess)(struct fc_port *);
+ int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
+ int (*chk_dif_tags)(uint32_t tag);
+ void (*add_target)(struct scsi_qla_host *);
};
int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -720,8 +730,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
#define QLA_TGT_ABORT_ALL 0xFFFE
#define QLA_TGT_NEXUS_LOSS_SESS 0xFFFD
#define QLA_TGT_NEXUS_LOSS 0xFFFC
-#define QLA_TGT_ABTS 0xFFFB
-#define QLA_TGT_2G_ABORT_TASK 0xFFFA
+#define QLA_TGT_ABTS 0xFFFB
+#define QLA_TGT_2G_ABORT_TASK 0xFFFA
/* Notify Acknowledge flags */
#define NOTIFY_ACK_RES_COUNT BIT_8
@@ -845,6 +855,7 @@ enum trace_flags {
TRC_CMD_FREE = BIT_17,
TRC_DATA_IN = BIT_18,
TRC_ABORT = BIT_19,
+ TRC_DIF_ERR = BIT_20,
};
struct qla_tgt_cmd {
@@ -862,7 +873,6 @@ struct qla_tgt_cmd {
unsigned int sg_mapped:1;
unsigned int free_sg:1;
unsigned int write_data_transferred:1;
- unsigned int ctx_dsd_alloced:1;
unsigned int q_full:1;
unsigned int term_exchg:1;
unsigned int cmd_sent_to_fw:1;
@@ -885,11 +895,25 @@ struct qla_tgt_cmd {
struct list_head cmd_list;
struct atio_from_isp atio;
- /* t10dif */
+
+ uint8_t ctx_dsd_alloced;
+
+ /* T10-DIF */
+#define DIF_ERR_NONE 0
+#define DIF_ERR_GRD 1
+#define DIF_ERR_REF 2
+#define DIF_ERR_APP 3
+ int8_t dif_err_code;
struct scatterlist *prot_sg;
uint32_t prot_sg_cnt;
- uint32_t blk_sz;
+ uint32_t blk_sz, num_blks;
+ uint8_t scsi_status, sense_key, asc, ascq;
+
struct crc_context *ctx;
+ uint8_t *cdb;
+ uint64_t lba;
+ uint16_t a_guard, e_guard, a_app_tag, e_app_tag;
+ uint32_t a_ref_tag, e_ref_tag;
uint64_t jiffies_at_alloc;
uint64_t jiffies_at_free;
@@ -1053,4 +1077,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *);
extern void qlt_logo_completion_handler(fc_port_t *, int);
extern void qlt_do_generation_tick(struct scsi_qla_host *, int *);
+void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t,
+ uint8_t, uint8_t, uint8_t);
+
#endif /* __QLA_TARGET_H */
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 3cb1964b7786..45bc84e8e3bf 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,9 +7,9 @@
/*
* Driver version
*/
-#define QLA2XXX_VERSION "8.07.00.38-k"
+#define QLA2XXX_VERSION "9.00.00.00-k"
-#define QLA_DRIVER_MAJOR_VER 8
-#define QLA_DRIVER_MINOR_VER 7
+#define QLA_DRIVER_MAJOR_VER 9
+#define QLA_DRIVER_MINOR_VER 0
#define QLA_DRIVER_PATCH_VER 0
#define QLA_DRIVER_BETA_VER 0
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 8e8ab0fa9672..7443e4efa3ae 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
return;
}
+ switch (cmd->dif_err_code) {
+ case DIF_ERR_GRD:
+ cmd->se_cmd.pi_err =
+ TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+ break;
+ case DIF_ERR_REF:
+ cmd->se_cmd.pi_err =
+ TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+ break;
+ case DIF_ERR_APP:
+ cmd->se_cmd.pi_err =
+ TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+ break;
+ case DIF_ERR_NONE:
+ default:
+ break;
+ }
+
if (cmd->se_cmd.pi_err)
transport_generic_request_failure(&cmd->se_cmd,
cmd->se_cmd.pi_err);
@@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
}
-static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
{
- struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-
- /* take an extra kref to prevent cmd free too early.
- * need to wait for SCSI status/check condition to
- * finish responding generate by transport_generic_request_failure.
- */
- kref_get(&cmd->se_cmd.cmd_kref);
- transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+ return 0;
}
-/*
- * Called from qla_target.c:qlt_do_ctio_completion()
- */
-static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
+ uint16_t *pfw_prot_opts)
{
- INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
- queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+
+ if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+ *pfw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
+ if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG))
+ *pfw_prot_opts |= PO_DIS_APP_TAG_VALD;
+
+ return 0;
}
/*
@@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
.handle_cmd = tcm_qla2xxx_handle_cmd,
.handle_data = tcm_qla2xxx_handle_data,
- .handle_dif_err = tcm_qla2xxx_handle_dif_err,
.handle_tmr = tcm_qla2xxx_handle_tmr,
.free_cmd = tcm_qla2xxx_free_cmd,
.free_mcmd = tcm_qla2xxx_free_mcmd,
@@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
.clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
.put_sess = tcm_qla2xxx_put_sess,
.shutdown_sess = tcm_qla2xxx_shutdown_sess,
+ .get_dif_tags = tcm_qla2xxx_dif_tags,
+ .chk_dif_tags = tcm_qla2xxx_chk_dif_tags,
};
static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ba2286652ff6..19125d72f322 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume);
/**
* scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
* @sdev: device to block
+ * @wait: Whether or not to wait until ongoing .queuecommand() /
+ * .queue_rq() calls have finished.
*
* Block request made by scsi lld's to temporarily stop all
* scsi commands on the specified device. May sleep.
@@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume);
* remove the rport mutex lock and unlock calls from srp_queuecommand().
*/
int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
{
struct request_queue *q = sdev->request_queue;
unsigned long flags;
@@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev)
* request queue.
*/
if (q->mq_ops) {
- blk_mq_quiesce_queue(q);
+ if (wait)
+ blk_mq_quiesce_queue(q);
+ else
+ blk_mq_stop_hw_queues(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- scsi_wait_for_queuecommand(sdev);
+ if (wait)
+ scsi_wait_for_queuecommand(sdev);
}
return 0;
@@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
static void
device_block(struct scsi_device *sdev, void *data)
{
- scsi_internal_device_block(sdev);
+ scsi_internal_device_block(sdev, true);
}
static int
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 99bfc985e190..f11bd102d6d5 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
*/
#define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state);
#endif /* _SCSI_PRIV_H */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index d277e8620e3e..fcfeddc79331 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
{
int result = SCpnt->result;
unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+ unsigned int sector_size = SCpnt->device->sector_size;
+ unsigned int resid;
struct scsi_sense_hdr sshdr;
struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
struct request *req = SCpnt->request;
@@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
scsi_set_resid(SCpnt, blk_rq_bytes(req));
}
break;
+ default:
+ /*
+ * In case of bogus fw or device, we could end up having
+ * an unaligned partial completion. Check this here and force
+ * alignment.
+ */
+ resid = scsi_get_resid(SCpnt);
+ if (resid & (sector_size - 1)) {
+ sd_printk(KERN_INFO, sdkp,
+ "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+ resid, sector_size);
+ resid = min(scsi_bufflen(SCpnt),
+ round_up(resid, sector_size));
+ scsi_set_resid(SCpnt, resid);
+ }
}
if (result) {
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 638e5f427c90..016639d7fef1 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels")
*/
static int storvsc_timeout = 180;
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
static struct scsi_transport_template *fc_transport_template;
#endif
@@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device,
return ret;
}
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+ /*
+ * Set blist flag to permit the reading of the VPD pages even when
+ * the target may claim SPC-2 compliance. MSFT targets currently
+ * claim SPC-2 compliance while they implement post SPC-2 features.
+ * With this flag we can correctly handle WRITE_SAME_16 issues.
+ *
+ * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+ * still supports REPORT LUN.
+ */
+ sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+ return 0;
+}
+
static int storvsc_device_configure(struct scsi_device *sdevice)
{
@@ -1396,14 +1410,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
sdevice->no_write_same = 1;
/*
- * Add blist flags to permit the reading of the VPD pages even when
- * the target may claim SPC-2 compliance. MSFT targets currently
- * claim SPC-2 compliance while they implement post SPC-2 features.
- * With this patch we can correctly handle WRITE_SAME_16 issues.
- */
- sdevice->sdev_bflags |= msft_blist_flags;
-
- /*
* If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
* if the device is a MSFT virtual device. If the host is
* WIN10 or newer, allow write_same.
@@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = {
.eh_host_reset_handler = storvsc_host_reset_handler,
.proc_name = "storvsc_host",
.eh_timed_out = storvsc_eh_timed_out,
+ .slave_alloc = storvsc_device_alloc,
.slave_configure = storvsc_device_configure,
.cmd_per_lun = 255,
.this_id = -1,
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 318e4a1f76c9..54deeb754db5 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -146,7 +146,7 @@ enum attr_idn {
/* Descriptor idn for Query requests */
enum desc_idn {
QUERY_DESC_IDN_DEVICE = 0x0,
- QUERY_DESC_IDN_CONFIGURAION = 0x1,
+ QUERY_DESC_IDN_CONFIGURATION = 0x1,
QUERY_DESC_IDN_UNIT = 0x2,
QUERY_DESC_IDN_RFU_0 = 0x3,
QUERY_DESC_IDN_INTERCONNECT = 0x4,
@@ -162,19 +162,13 @@ enum desc_header_offset {
QUERY_DESC_DESC_TYPE_OFFSET = 0x01,
};
-enum ufs_desc_max_size {
- QUERY_DESC_DEVICE_MAX_SIZE = 0x40,
- QUERY_DESC_CONFIGURAION_MAX_SIZE = 0x90,
- QUERY_DESC_UNIT_MAX_SIZE = 0x23,
- QUERY_DESC_INTERCONNECT_MAX_SIZE = 0x06,
- /*
- * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
- * of descriptor header.
- */
- QUERY_DESC_STRING_MAX_SIZE = 0xFE,
- QUERY_DESC_GEOMETRY_MAX_SIZE = 0x44,
- QUERY_DESC_POWER_MAX_SIZE = 0x62,
- QUERY_DESC_RFU_MAX_SIZE = 0x00,
+enum ufs_desc_def_size {
+ QUERY_DESC_DEVICE_DEF_SIZE = 0x40,
+ QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90,
+ QUERY_DESC_UNIT_DEF_SIZE = 0x23,
+ QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06,
+ QUERY_DESC_GEOMETRY_DEF_SIZE = 0x44,
+ QUERY_DESC_POWER_DEF_SIZE = 0x62,
};
/* Unit descriptor parameters offsets in bytes*/
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index dc6efbd1be8e..e8c26e6e6237 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -100,19 +100,6 @@
#define ufshcd_hex_dump(prefix_str, buf, len) \
print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
-static u32 ufs_query_desc_max_size[] = {
- QUERY_DESC_DEVICE_MAX_SIZE,
- QUERY_DESC_CONFIGURAION_MAX_SIZE,
- QUERY_DESC_UNIT_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
- QUERY_DESC_INTERCONNECT_MAX_SIZE,
- QUERY_DESC_STRING_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
- QUERY_DESC_GEOMETRY_MAX_SIZE,
- QUERY_DESC_POWER_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
-};
-
enum {
UFSHCD_MAX_CHANNEL = 0,
UFSHCD_MAX_ID = 1,
@@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
goto out;
}
- if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+ if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
__func__, *buf_len);
err = -EINVAL;
@@ -2938,6 +2925,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
}
/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+ enum desc_idn desc_id,
+ int desc_index,
+ int *desc_length)
+{
+ int ret;
+ u8 header[QUERY_DESC_HDR_SIZE];
+ int header_len = QUERY_DESC_HDR_SIZE;
+
+ if (desc_id >= QUERY_DESC_IDN_MAX)
+ return -EINVAL;
+
+ ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+ desc_id, desc_index, 0, header,
+ &header_len);
+
+ if (ret) {
+ dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+ __func__, desc_id);
+ return ret;
+ } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+ dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+ __func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+ desc_id);
+ ret = -EINVAL;
+ }
+
+ *desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+ return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+ enum desc_idn desc_id, int *desc_len)
+{
+ switch (desc_id) {
+ case QUERY_DESC_IDN_DEVICE:
+ *desc_len = hba->desc_size.dev_desc;
+ break;
+ case QUERY_DESC_IDN_POWER:
+ *desc_len = hba->desc_size.pwr_desc;
+ break;
+ case QUERY_DESC_IDN_GEOMETRY:
+ *desc_len = hba->desc_size.geom_desc;
+ break;
+ case QUERY_DESC_IDN_CONFIGURATION:
+ *desc_len = hba->desc_size.conf_desc;
+ break;
+ case QUERY_DESC_IDN_UNIT:
+ *desc_len = hba->desc_size.unit_desc;
+ break;
+ case QUERY_DESC_IDN_INTERCONNECT:
+ *desc_len = hba->desc_size.interc_desc;
+ break;
+ case QUERY_DESC_IDN_STRING:
+ *desc_len = QUERY_DESC_MAX_SIZE;
+ break;
+ case QUERY_DESC_IDN_RFU_0:
+ case QUERY_DESC_IDN_RFU_1:
+ *desc_len = 0;
+ break;
+ default:
+ *desc_len = 0;
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
+/**
* ufshcd_read_desc_param - read the specified descriptor parameter
* @hba: Pointer to adapter instance
* @desc_id: descriptor idn value
@@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
static int ufshcd_read_desc_param(struct ufs_hba *hba,
enum desc_idn desc_id,
int desc_index,
- u32 param_offset,
+ u8 param_offset,
u8 *param_read_buf,
- u32 param_size)
+ u8 param_size)
{
int ret;
u8 *desc_buf;
- u32 buff_len;
+ int buff_len;
bool is_kmalloc = true;
- /* safety checks */
- if (desc_id >= QUERY_DESC_IDN_MAX)
+ /* Safety check */
+ if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
return -EINVAL;
- buff_len = ufs_query_desc_max_size[desc_id];
- if ((param_offset + param_size) > buff_len)
- return -EINVAL;
+ /* Get the max length of descriptor from structure filled up at probe
+ * time.
+ */
+ ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
- if (!param_offset && (param_size == buff_len)) {
- /* memory space already available to hold full descriptor */
- desc_buf = param_read_buf;
- is_kmalloc = false;
- } else {
- /* allocate memory to hold full descriptor */
+ /* Sanity checks */
+ if (ret || !buff_len) {
+ dev_err(hba->dev, "%s: Failed to get full descriptor length",
+ __func__);
+ return ret;
+ }
+
+ /* Check whether we need temp memory */
+ if (param_offset != 0 || param_size < buff_len) {
desc_buf = kmalloc(buff_len, GFP_KERNEL);
if (!desc_buf)
return -ENOMEM;
+ } else {
+ desc_buf = param_read_buf;
+ is_kmalloc = false;
}
+ /* Request for full descriptor */
ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
- desc_id, desc_index, 0, desc_buf,
- &buff_len);
+ desc_id, desc_index, 0,
+ desc_buf, &buff_len);
if (ret) {
dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
__func__, desc_id, desc_index, param_offset, ret);
-
goto out;
}
@@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
goto out;
}
- /*
- * While reading variable size descriptors (like string descriptor),
- * some UFS devices may report the "LENGTH" (field in "Transaction
- * Specific fields" of Query Response UPIU) same as what was requested
- * in Query Request UPIU instead of reporting the actual size of the
- * variable size descriptor.
- * Although it's safe to ignore the "LENGTH" field for variable size
- * descriptors as we can always derive the length of the descriptor from
- * the descriptor header fields. Hence this change impose the length
- * match check only for fixed size descriptors (for which we always
- * request the correct size as part of Query Request UPIU).
- */
- if ((desc_id != QUERY_DESC_IDN_STRING) &&
- (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
- dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
- __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
- ret = -EINVAL;
- goto out;
- }
+ /* Check wherher we will not copy more data, than available */
+ if (is_kmalloc && param_size > buff_len)
+ param_size = buff_len;
if (is_kmalloc)
memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level)
static void ufshcd_init_icc_levels(struct ufs_hba *hba)
{
int ret;
- int buff_len = QUERY_DESC_POWER_MAX_SIZE;
- u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+ int buff_len = hba->desc_size.pwr_desc;
+ u8 desc_buf[hba->desc_size.pwr_desc];
ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
if (ret) {
@@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
{
int err;
u8 model_index;
- u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
- u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+ u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+ u8 desc_buf[hba->desc_size.dev_desc];
- err = ufshcd_read_device_desc(hba, desc_buf,
- QUERY_DESC_DEVICE_MAX_SIZE);
+ err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
if (err) {
dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
__func__, err);
@@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
- QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+ QUERY_DESC_MAX_SIZE, ASCII_STD);
if (err) {
dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
__func__, err);
goto out;
}
- str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+ str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
MAX_MODEL_LEN));
@@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
hba->req_abort_count = 0;
}
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+ int err;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+ &hba->desc_size.dev_desc);
+ if (err)
+ hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+ &hba->desc_size.pwr_desc);
+ if (err)
+ hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+ &hba->desc_size.interc_desc);
+ if (err)
+ hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+ &hba->desc_size.conf_desc);
+ if (err)
+ hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+ &hba->desc_size.unit_desc);
+ if (err)
+ hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+ &hba->desc_size.geom_desc);
+ if (err)
+ hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+ hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+ hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+ hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+ hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+ hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+ hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
/**
* ufshcd_probe_hba - probe hba to detect device and initialize
* @hba: per-adapter instance
@@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
if (ret)
goto out;
+ /* Init check for device descriptor sizes */
+ ufshcd_init_desc_sizes(hba);
+
ret = ufs_get_device_desc(hba, &card);
if (ret) {
dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
/* set the state as operational after switching to desired gear */
hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
/*
* If we are in error handling context or in power management callbacks
* context, no need to scan the host
@@ -7530,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
if (kstrtoul(buf, 0, &value))
return -EINVAL;
- if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))
+ if (value >= UFS_PM_LVL_MAX)
return -EINVAL;
spin_lock_irqsave(hba->host->host_lock, flags);
@@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
hba->mmio_base = mmio_base;
hba->irq = irq;
+ /* Set descriptor lengths to specification defaults */
+ ufshcd_def_desc_sizes(hba);
+
err = ufshcd_hba_init(hba);
if (err)
goto out_error;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 7630600217a2..cdc8bd05f7df 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -220,6 +220,15 @@ struct ufs_dev_cmd {
struct ufs_query query;
};
+struct ufs_desc_size {
+ int dev_desc;
+ int pwr_desc;
+ int geom_desc;
+ int interc_desc;
+ int unit_desc;
+ int conf_desc;
+};
+
/**
* struct ufs_clk_info - UFS clock related info
* @list: list headed by hba->clk_list_head
@@ -483,6 +492,7 @@ struct ufs_stats {
* @clk_list_head: UFS host controller clocks list node head
* @pwr_info: holds current power mode
* @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
* @urgent_bkops_lvl: keeps track of urgent bkops level for device
* @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
* device is known or not.
@@ -666,6 +676,7 @@ struct ufs_hba {
bool is_urgent_bkops_lvl_checked;
struct rw_semaphore clk_scaling_lock;
+ struct ufs_desc_size desc_size;
};
/* Returns true if clocks can be gated. Otherwise false */
@@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
enum flag_idn idn, bool *flag_res);
int ufshcd_hold(struct ufs_hba *hba, bool async);
void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+ int *desc_length);
+
u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
/* Wrapper functions for safely calling variant operations */
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ef474a748744..c374e3b5c678 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
irq_flag &= ~PCI_IRQ_MSI;
error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
- if (error)
+ if (error < 0)
goto out_reset_adapter;
adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index b7b87ecefcdf..9fca8d225ee0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
newsock->ops = sock->ops;
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
if (rc == -EAGAIN) {
/* Nothing ready, so wait for activity */
init_waitqueue_entry(&wait, current);
@@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
set_current_state(TASK_INTERRUPTIBLE);
schedule();
remove_wait_queue(sk_sleep(sock->sk), &wait);
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
}
if (rc)
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index f5e330099bfc..fd7c16a7ca6e 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -43,7 +43,7 @@
#include "target_core_ua.h"
static sense_reason_t core_alua_check_transition(int state, int valid,
- int *primary);
+ int *primary, int explicit);
static int core_alua_set_tg_pt_secondary_state(
struct se_lun *lun, int explicit, int offline);
@@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
* the state is a primary or secondary target port asymmetric
* access state.
*/
- rc = core_alua_check_transition(alua_access_state,
- valid_states, &primary);
+ rc = core_alua_check_transition(alua_access_state, valid_states,
+ &primary, 1);
if (rc) {
/*
* If the SET TARGET PORT GROUPS attempts to establish
@@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd)
if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
return 0;
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+ if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
return 0;
/*
@@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd)
* Check implicit and explicit ALUA state change request.
*/
static sense_reason_t
-core_alua_check_transition(int state, int valid, int *primary)
+core_alua_check_transition(int state, int valid, int *primary, int explicit)
{
/*
* OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are
@@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary)
*primary = 0;
break;
case ALUA_ACCESS_STATE_TRANSITION:
- /*
- * Transitioning is set internally, and
- * cannot be selected manually.
- */
- goto not_supported;
+ if (!(valid & ALUA_T_SUP) || explicit)
+ /*
+ * Transitioning is set internally and by tcmu daemon,
+ * and cannot be selected through a STPG.
+ */
+ goto not_supported;
+ *primary = 0;
+ break;
default:
pr_err("Unknown ALUA access state: 0x%02x\n", state);
return TCM_INVALID_PARAMETER_LIST;
@@ -1013,7 +1016,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
{
struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
- struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+ struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1070,32 +1073,19 @@ static int core_alua_do_transition_tg_pt(
if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
return 0;
- if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+ if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
return -EAGAIN;
/*
* Flush any pending transitions
*/
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
- atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
- ALUA_ACCESS_STATE_TRANSITION) {
- /* Just in case */
- tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
- tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
- wait_for_completion(&wait);
- tg_pt_gp->tg_pt_gp_transition_complete = NULL;
- return 0;
- }
+ if (!explicit)
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Save the old primary ALUA access state, and set the current state
* to ALUA_ACCESS_STATE_TRANSITION.
*/
- tg_pt_gp->tg_pt_gp_alua_previous_state =
- atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
- tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_TRANSITION);
tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
@@ -1104,6 +1094,13 @@ static int core_alua_do_transition_tg_pt(
core_alua_queue_state_change_ua(tg_pt_gp);
+ if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+ return 0;
+
+ tg_pt_gp->tg_pt_gp_alua_previous_state =
+ atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+ tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+
/*
* Check for the optional ALUA primary state transition delay
*/
@@ -1117,17 +1114,9 @@ static int core_alua_do_transition_tg_pt(
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
- unsigned long transition_tmo;
-
- transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work,
- transition_tmo);
- } else {
+ schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ if (explicit) {
tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work, 0);
wait_for_completion(&wait);
tg_pt_gp->tg_pt_gp_transition_complete = NULL;
}
@@ -1149,8 +1138,12 @@ int core_alua_do_port_transition(
struct t10_alua_tg_pt_gp *tg_pt_gp;
int primary, valid_states, rc = 0;
+ if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
+ return -ENODEV;
+
valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
- if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
+ if (core_alua_check_transition(new_state, valid_states, &primary,
+ explicit) != 0)
return -EINVAL;
local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem;
@@ -1695,8 +1688,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
- INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
- core_alua_do_transition_tg_pt_work);
+ INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+ core_alua_do_transition_tg_pt_work);
tg_pt_gp->tg_pt_gp_dev = dev;
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1804,7 +1797,7 @@ void core_alua_free_tg_pt_gp(
dev->t10_alua.alua_tg_pt_gps_counter--;
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Allow a struct t10_alua_tg_pt_gp_member * referenced by
@@ -1973,7 +1966,7 @@ ssize_t core_alua_store_tg_pt_gp_info(
unsigned char buf[TG_PT_GROUP_NAME_BUF];
int move = 0;
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+ if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
return -ENODEV;
@@ -2230,7 +2223,7 @@ ssize_t core_alua_store_offline_bit(
unsigned long tmp;
int ret;
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+ if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
return -ENODEV;
@@ -2316,7 +2309,8 @@ ssize_t core_alua_store_secondary_write_metadata(
int core_setup_alua(struct se_device *dev)
{
- if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+ if (!(dev->transport->transport_flags &
+ TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
struct t10_alua_lu_gp_member *lu_gp_mem;
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 54b36c9835be..38b5025e4c7a 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
pr_err("Missing tfo->aborted_task()\n");
return -EINVAL;
}
+ if (!tfo->check_stop_free) {
+ pr_err("Missing tfo->check_stop_free()\n");
+ return -EINVAL;
+ }
/*
* We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
* tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index a8f8e53f2f57..94cda7991e80 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
buf = kzalloc(12, GFP_KERNEL);
if (!buf)
- return;
+ goto out_free;
memset(cdb, 0, MAX_COMMAND_SIZE);
cdb[0] = MODE_SENSE;
@@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
* If MODE_SENSE still returns zero, set the default value to 1024.
*/
sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+out_free:
if (!sdev->sector_size)
sdev->sector_size = 1024;
-out_free:
+
kfree(buf);
}
@@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
sd->lun, sd->queue_depth);
}
- dev->dev_attrib.hw_block_size = sd->sector_size;
+ dev->dev_attrib.hw_block_size =
+ min_not_zero((int)sd->sector_size, 512);
dev->dev_attrib.hw_max_sectors =
- min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q));
+ min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q));
dev->dev_attrib.hw_queue_depth = sd->queue_depth;
/*
@@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
/*
* For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE.
*/
- if (sd->type == TYPE_TAPE)
+ if (sd->type == TYPE_TAPE) {
pscsi_tape_read_blocksize(dev, sd);
+ dev->dev_attrib.hw_block_size = sd->sector_size;
+ }
return 0;
}
@@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
/*
* Called with struct Scsi_Host->host_lock called.
*/
-static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
+static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd)
__releases(sh->host_lock)
{
struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
return 0;
}
-/*
- * Called with struct Scsi_Host->host_lock called.
- */
-static int pscsi_create_type_other(struct se_device *dev,
- struct scsi_device *sd)
- __releases(sh->host_lock)
-{
- struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
- struct Scsi_Host *sh = sd->host;
- int ret;
-
- spin_unlock_irq(sh->host_lock);
- ret = pscsi_add_device_to_list(dev, sd);
- if (ret)
- return ret;
-
- pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n",
- phv->phv_host_id, scsi_device_type(sd->type), sh->host_no,
- sd->channel, sd->id, sd->lun);
- return 0;
-}
-
static int pscsi_configure_device(struct se_device *dev)
{
struct se_hba *hba = dev->se_hba;
@@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev)
case TYPE_DISK:
ret = pscsi_create_type_disk(dev, sd);
break;
- case TYPE_ROM:
- ret = pscsi_create_type_rom(dev, sd);
- break;
default:
- ret = pscsi_create_type_other(dev, sd);
+ ret = pscsi_create_type_nondisk(dev, sd);
break;
}
@@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev)
else if (pdv->pdv_lld_host)
scsi_host_put(pdv->pdv_lld_host);
- if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
- scsi_device_put(sd);
+ scsi_device_put(sd);
pdv->pdv_sd = NULL;
}
@@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
if (pdv->pdv_bd && pdv->pdv_bd->bd_part)
return pdv->pdv_bd->bd_part->nr_sects;
- dump_stack();
return 0;
}
@@ -1103,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate)
static const struct target_backend_ops pscsi_ops = {
.name = "pscsi",
.owner = THIS_MODULE,
- .transport_flags = TRANSPORT_FLAG_PASSTHROUGH,
+ .transport_flags = TRANSPORT_FLAG_PASSTHROUGH |
+ TRANSPORT_FLAG_PASSTHROUGH_ALUA,
.attach_hba = pscsi_attach_hba,
.detach_hba = pscsi_detach_hba,
.pmode_enable_hba = pscsi_pmode_enable_hba,
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 68d8aef7ab78..c194063f169b 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
return ret;
break;
case VERIFY:
+ case VERIFY_16:
size = 0;
- sectors = transport_get_sectors_10(cdb);
- cmd->t_task_lba = transport_lba_32(cdb);
+ if (cdb[0] == VERIFY) {
+ sectors = transport_get_sectors_10(cdb);
+ cmd->t_task_lba = transport_lba_32(cdb);
+ } else {
+ sectors = transport_get_sectors_16(cdb);
+ cmd->t_task_lba = transport_lba_64(cdb);
+ }
cmd->execute_cmd = sbc_emulate_noop;
goto check_lba;
case REZERO_UNIT:
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c0dbfa016575..6fb191914f45 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -602,7 +602,8 @@ int core_tpg_add_lun(
if (ret)
goto out_kill_ref;
- if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+ if (!(dev->transport->transport_flags &
+ TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 434d9d693989..b1a3cdb29468 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
* Fabric modules are expected to return '1' here if the se_cmd being
* passed is released at this point, or zero if not being released.
*/
- return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
- : 0;
+ return cmd->se_tfo->check_stop_free(cmd);
}
static void transport_lun_remove_cmd(struct se_cmd *cmd)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c3adefe95e50..c6874c38a10b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,7 @@
#include <linux/stringify.h>
#include <linux/bitops.h>
#include <linux/highmem.h>
+#include <linux/configfs.h>
#include <net/genetlink.h>
#include <scsi/scsi_common.h>
#include <scsi/scsi_proto.h>
@@ -112,6 +113,7 @@ struct tcmu_dev {
spinlock_t commands_lock;
struct timer_list timeout;
+ unsigned int cmd_time_out;
char dev_config[TCMU_CONFIG_LEN];
};
@@ -172,7 +174,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
tcmu_cmd->se_cmd = se_cmd;
tcmu_cmd->tcmu_dev = udev;
- tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+ if (udev->cmd_time_out)
+ tcmu_cmd->deadline = jiffies +
+ msecs_to_jiffies(udev->cmd_time_out);
idr_preload(GFP_KERNEL);
spin_lock_irq(&udev->commands_lock);
@@ -451,7 +455,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
pr_debug("sleeping for ring space\n");
spin_unlock_irq(&udev->cmdr_lock);
- ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+ if (udev->cmd_time_out)
+ ret = schedule_timeout(
+ msecs_to_jiffies(udev->cmd_time_out));
+ else
+ ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
finish_wait(&udev->wait_cmdr, &__wait);
if (!ret) {
pr_warn("tcmu: command timed out\n");
@@ -526,8 +534,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
/* TODO: only if FLUSH and FUA? */
uio_event_notify(&udev->uio_info);
- mod_timer(&udev->timeout,
- round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+ if (udev->cmd_time_out)
+ mod_timer(&udev->timeout, round_jiffies_up(jiffies +
+ msecs_to_jiffies(udev->cmd_time_out)));
return TCM_NO_SENSE;
}
@@ -742,6 +751,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
}
udev->hba = hba;
+ udev->cmd_time_out = TCMU_TIME_OUT;
init_waitqueue_head(&udev->wait_cmdr);
spin_lock_init(&udev->cmdr_lock);
@@ -960,7 +970,8 @@ static int tcmu_configure_device(struct se_device *dev)
if (dev->dev_attrib.hw_block_size == 0)
dev->dev_attrib.hw_block_size = 512;
/* Other attributes can be configured in userspace */
- dev->dev_attrib.hw_max_sectors = 128;
+ if (!dev->dev_attrib.hw_max_sectors)
+ dev->dev_attrib.hw_max_sectors = 128;
dev->dev_attrib.hw_queue_depth = 128;
ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
@@ -997,6 +1008,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
kfree(udev);
}
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
+{
+ return udev->uio_info.uio_dev ? true : false;
+}
+
static void tcmu_free_device(struct se_device *dev)
{
struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1018,8 +1034,7 @@ static void tcmu_free_device(struct se_device *dev)
spin_unlock_irq(&udev->commands_lock);
WARN_ON(!all_expired);
- /* Device was configured */
- if (udev->uio_info.uio_dev) {
+ if (tcmu_dev_configured(udev)) {
tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
udev->uio_info.uio_dev->minor);
@@ -1031,16 +1046,42 @@ static void tcmu_free_device(struct se_device *dev)
}
enum {
- Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
+ Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
+ Opt_err,
};
static match_table_t tokens = {
{Opt_dev_config, "dev_config=%s"},
{Opt_dev_size, "dev_size=%u"},
{Opt_hw_block_size, "hw_block_size=%u"},
+ {Opt_hw_max_sectors, "hw_max_sectors=%u"},
{Opt_err, NULL}
};
+static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
+{
+ unsigned long tmp_ul;
+ char *arg_p;
+ int ret;
+
+ arg_p = match_strdup(arg);
+ if (!arg_p)
+ return -ENOMEM;
+
+ ret = kstrtoul(arg_p, 0, &tmp_ul);
+ kfree(arg_p);
+ if (ret < 0) {
+ pr_err("kstrtoul() failed for dev attrib\n");
+ return ret;
+ }
+ if (!tmp_ul) {
+ pr_err("dev attrib must be nonzero\n");
+ return -EINVAL;
+ }
+ *dev_attrib = tmp_ul;
+ return 0;
+}
+
static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
const char *page, ssize_t count)
{
@@ -1048,7 +1089,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
char *orig, *ptr, *opts, *arg_p;
substring_t args[MAX_OPT_ARGS];
int ret = 0, token;
- unsigned long tmp_ul;
opts = kstrdup(page, GFP_KERNEL);
if (!opts)
@@ -1082,26 +1122,19 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
pr_err("kstrtoul() failed for dev_size=\n");
break;
case Opt_hw_block_size:
- arg_p = match_strdup(&args[0]);
- if (!arg_p) {
- ret = -ENOMEM;
- break;
- }
- ret = kstrtoul(arg_p, 0, &tmp_ul);
- kfree(arg_p);
- if (ret < 0) {
- pr_err("kstrtoul() failed for hw_block_size=\n");
- break;
- }
- if (!tmp_ul) {
- pr_err("hw_block_size must be nonzero\n");
- break;
- }
- dev->dev_attrib.hw_block_size = tmp_ul;
+ ret = tcmu_set_dev_attrib(&args[0],
+ &(dev->dev_attrib.hw_block_size));
+ break;
+ case Opt_hw_max_sectors:
+ ret = tcmu_set_dev_attrib(&args[0],
+ &(dev->dev_attrib.hw_max_sectors));
break;
default:
break;
}
+
+ if (ret)
+ break;
}
kfree(orig);
@@ -1134,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd)
return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
}
-static const struct target_backend_ops tcmu_ops = {
+static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
+{
+ struct se_dev_attrib *da = container_of(to_config_group(item),
+ struct se_dev_attrib, da_group);
+ struct tcmu_dev *udev = container_of(da->da_dev,
+ struct tcmu_dev, se_dev);
+
+ return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
+ size_t count)
+{
+ struct se_dev_attrib *da = container_of(to_config_group(item),
+ struct se_dev_attrib, da_group);
+ struct tcmu_dev *udev = container_of(da->da_dev,
+ struct tcmu_dev, se_dev);
+ u32 val;
+ int ret;
+
+ if (da->da_dev->export_count) {
+ pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
+ return -EINVAL;
+ }
+
+ ret = kstrtou32(page, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ if (!val) {
+ pr_err("Illegal value for cmd_time_out\n");
+ return -EINVAL;
+ }
+
+ udev->cmd_time_out = val * MSEC_PER_SEC;
+ return count;
+}
+CONFIGFS_ATTR(tcmu_, cmd_time_out);
+
+static struct configfs_attribute **tcmu_attrs;
+
+static struct target_backend_ops tcmu_ops = {
.name = "user",
.owner = THIS_MODULE,
.transport_flags = TRANSPORT_FLAG_PASSTHROUGH,
@@ -1148,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = {
.show_configfs_dev_params = tcmu_show_configfs_dev_params,
.get_device_type = sbc_get_device_type,
.get_blocks = tcmu_get_blocks,
- .tb_dev_attrib_attrs = passthrough_attrib_attrs,
+ .tb_dev_attrib_attrs = NULL,
};
static int __init tcmu_module_init(void)
{
- int ret;
+ int ret, i, len = 0;
BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
@@ -1175,12 +1249,31 @@ static int __init tcmu_module_init(void)
goto out_unreg_device;
}
+ for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+ len += sizeof(struct configfs_attribute *);
+ }
+ len += sizeof(struct configfs_attribute *) * 2;
+
+ tcmu_attrs = kzalloc(len, GFP_KERNEL);
+ if (!tcmu_attrs) {
+ ret = -ENOMEM;
+ goto out_unreg_genl;
+ }
+
+ for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+ tcmu_attrs[i] = passthrough_attrib_attrs[i];
+ }
+ tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+ tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
+
ret = transport_backend_register(&tcmu_ops);
if (ret)
- goto out_unreg_genl;
+ goto out_attrs;
return 0;
+out_attrs:
+ kfree(tcmu_attrs);
out_unreg_genl:
genl_unregister_family(&tcmu_genl_family);
out_unreg_device:
@@ -1194,6 +1287,7 @@ out_free_cache:
static void __exit tcmu_module_exit(void)
{
target_backend_unregister(&tcmu_ops);
+ kfree(tcmu_attrs);
genl_unregister_family(&tcmu_genl_family);
root_device_unregister(tcmu_root_device);
kmem_cache_destroy(tcmu_cmd_cache);
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index bcf1d33e6ffe..c334bcc59c64 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
pinctrl_select_state(ascport->pinctrl,
ascport->states[NO_HW_FLOWCTRL]);
- gpiod = devm_get_gpiod_from_child(port->dev, "rts",
- &np->fwnode);
- if (!IS_ERR(gpiod)) {
- gpiod_direction_output(gpiod, 0);
+ gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
+ "rts",
+ &np->fwnode,
+ GPIOD_OUT_LOW,
+ np->name);
+ if (!IS_ERR(gpiod))
ascport->rts = gpiod;
- }
}
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c77a0751a311..f3bf8f4e2d6c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -36,6 +36,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/refcount.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -86,7 +87,7 @@ struct grant_map {
int index;
int count;
int flags;
- atomic_t users;
+ refcount_t users;
struct unmap_notify notify;
struct ioctl_gntdev_grant_ref *grants;
struct gnttab_map_grant_ref *map_ops;
@@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
add->index = 0;
add->count = count;
- atomic_set(&add->users, 1);
+ refcount_set(&add->users, 1);
return add;
@@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
if (!map)
return;
- if (!atomic_dec_and_test(&map->users))
+ if (!refcount_dec_and_test(&map->users))
return;
atomic_sub(map->count, &pages_mapped);
@@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
struct grant_map *map = vma->vm_private_data;
pr_debug("gntdev_vma_open %p\n", vma);
- atomic_inc(&map->users);
+ refcount_inc(&map->users);
}
static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
- atomic_inc(&map->users);
+ refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b29447e03ede..25d404d22cae 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
{
struct afs_server *server;
struct afs_vnode *vnode, *xvnode;
- time_t now;
+ time64_t now;
long timeout;
int ret;
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find the first vnode to update */
spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+ vnode->update_at = ktime_get_real_seconds() +
+ afs_vnode_update_timeout;
spin_lock(&server->cb_lock);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2edbdcbf6432..3062cceb5c2a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
struct afs_callback *cb;
struct afs_server *server;
__be32 *bp;
- u32 tmp;
int ret, loop;
_enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
if (ret < 0)
return ret;
- tmp = ntohl(call->tmp);
- _debug("CB count: %u", tmp);
- if (tmp != call->count && tmp != 0)
+ call->count2 = ntohl(call->tmp);
+ _debug("CB count: %u", call->count2);
+ if (call->count2 != call->count && call->count2 != 0)
return -EBADMSG;
call->offset = 0;
call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
case 4:
_debug("extract CB array");
ret = afs_extract_data(call, call->buffer,
- call->count * 3 * 4, false);
+ call->count2 * 3 * 4, false);
if (ret < 0)
return ret;
_debug("unmarshall CB array");
cb = call->request;
bp = call->buffer;
- for (loop = call->count; loop > 0; loop--, cb++) {
+ for (loop = call->count2; loop > 0; loop--, cb++) {
cb->version = ntohl(*bp++);
cb->expiry = ntohl(*bp++);
cb->type = ntohl(*bp++);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ba7b71fba34b..0d5b8508869b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
const struct file_operations afs_file_operations = {
.open = afs_open,
+ .flush = afs_flush,
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page)
if (!req)
goto enomem;
+ /* We request a full page. If the page is a partial one at the
+ * end of the file, the server will return a short read and the
+ * unmarshalling code will clear the unfilled space.
+ */
atomic_set(&req->usage, 1);
req->pos = (loff_t)page->index << PAGE_SHIFT;
- req->len = min_t(size_t, i_size_read(inode) - req->pos,
- PAGE_SIZE);
+ req->len = PAGE_SIZE;
req->nr_pages = 1;
req->pages[0] = page;
get_page(page);
@@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
fscache_uncache_page(vnode->cache, page);
#endif
BUG_ON(PageFsCache(page));
- goto error;
+
+ if (ret == -EINTR ||
+ ret == -ENOMEM ||
+ ret == -ERESTARTSYS ||
+ ret == -EAGAIN)
+ goto error;
+ goto io_error;
}
SetPageUptodate(page);
@@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
_leave(" = 0");
return 0;
+io_error:
+ SetPageError(page);
+ goto error;
enomem:
ret = -ENOMEM;
error:
- SetPageError(page);
unlock_page(page);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ac8e766978dc..19f76ae36982 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -17,6 +17,12 @@
#include "afs_fs.h"
/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
+/*
* decode an AFSFid block
*/
static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
@@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_mode = mode;
}
- vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+ vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_version = data_version;
@@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
vnode->cb_version = ntohl(*bp++);
vnode->cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
*_bp = bp;
}
@@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
void *buffer;
int ret;
- _enter("{%u,%zu/%u;%u/%llu}",
+ _enter("{%u,%zu/%u;%llu/%llu}",
call->unmarshall, call->offset, call->count,
req->remain, req->actual_len);
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
req->actual_len |= ntohl(call->tmp);
_debug("DATA length: %llu", req->actual_len);
- /* Check that the server didn't want to send us extra. We
- * might want to just discard instead, but that requires
- * cooperation from AF_RXRPC.
- */
- if (req->actual_len > req->len)
- return -EBADMSG;
req->remain = req->actual_len;
call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->unmarshall++;
begin_page:
+ ASSERTCMP(req->index, <, req->nr_pages);
if (req->remain > PAGE_SIZE - call->offset)
size = PAGE_SIZE - call->offset;
else
@@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
/* extract the returned data */
case 3:
- _debug("extract data %u/%llu %zu/%u",
+ _debug("extract data %llu/%llu %zu/%u",
req->remain, req->actual_len, call->offset, call->count);
buffer = kmap(req->pages[req->index]);
@@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (call->offset == PAGE_SIZE) {
if (req->page_done)
req->page_done(call, req);
+ req->index++;
if (req->remain > 0) {
- req->index++;
call->offset = 0;
+ if (req->index >= req->nr_pages) {
+ call->unmarshall = 4;
+ goto begin_discard;
+ }
goto begin_page;
}
}
+ goto no_more_data;
+
+ /* Discard any excess data the server gave us */
+ begin_discard:
+ case 4:
+ size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
+ call->count = size;
+ _debug("extract discard %llu/%llu %zu/%u",
+ req->remain, req->actual_len, call->offset, call->count);
+
+ call->offset = 0;
+ ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+ req->remain -= call->offset;
+ if (ret < 0)
+ return ret;
+ if (req->remain > 0)
+ goto begin_discard;
no_more_data:
call->offset = 0;
- call->unmarshall++;
+ call->unmarshall = 5;
/* extract the metadata */
- case 4:
+ case 5:
ret = afs_extract_data(call, call->buffer,
(21 + 3 + 6) * 4, false);
if (ret < 0)
@@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->offset = 0;
call->unmarshall++;
- case 5:
+ case 6:
break;
}
- if (call->count < PAGE_SIZE) {
- buffer = kmap(req->pages[req->index]);
- memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap(req->pages[req->index]);
+ for (; req->index < req->nr_pages; req->index++) {
+ if (call->count < PAGE_SIZE)
+ zero_user_segment(req->pages[req->index],
+ call->count, PAGE_SIZE);
if (req->page_done)
req->page_done(call, req);
+ call->count = 0;
}
_leave(" = 0 [done]");
@@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
memset(bp, 0, padsz);
bp = (void *) bp + padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
memset(bp, 0, c_padsz);
bp = (void *) bp + c_padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
@@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
_enter(",%x,{%x:%u},,",
key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
- size = to - offset;
+ size = (loff_t)to - (loff_t)offset;
if (first != last)
size += (loff_t)(last - first) << PAGE_SHIFT;
pos = (loff_t)first << PAGE_SHIFT;
@@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1e4897a048d2..aae55dd15108 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_fop = &afs_dir_file_operations;
break;
case AFS_FTYPE_SYMLINK:
- inode->i_mode = S_IFLNK | vnode->status.mode;
- inode->i_op = &page_symlink_inode_operations;
+ /* Symlinks with a mode of 0644 are actually mountpoints. */
+ if ((vnode->status.mode & 0777) == 0644) {
+ inode->i_flags |= S_AUTOMOUNT;
+
+ spin_lock(&vnode->lock);
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ spin_unlock(&vnode->lock);
+
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_mntpt_inode_operations;
+ inode->i_fop = &afs_mntpt_file_operations;
+ } else {
+ inode->i_mode = S_IFLNK | vnode->status.mode;
+ inode->i_op = &page_symlink_inode_operations;
+ }
inode_nohighmem(inode);
break;
default:
@@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
- inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_gid = vnode->status.group;
inode->i_size = vnode->status.size;
- inode->i_ctime.tv_sec = vnode->status.mtime_server;
+ inode->i_ctime.tv_sec = vnode->status.mtime_client;
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
-
- /* check to see whether a symbolic link is really a mountpoint */
- if (vnode->status.type == AFS_FTYPE_SYMLINK) {
- afs_mntpt_check_symlink(vnode, key);
-
- if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
- inode->i_mode = S_IFDIR | vnode->status.mode;
- inode->i_op = &afs_mntpt_inode_operations;
- inode->i_fop = &afs_mntpt_file_operations;
- }
- }
-
return 0;
}
@@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
vnode->cb_version = 0;
vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = get_seconds();
+ vnode->cb_expires = ktime_get_real_seconds();
} else {
vnode->cb_version = cb->version;
vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry +
+ ktime_get_real_seconds();
}
}
@@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
!test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < get_seconds() + 10) {
+ if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
_debug("callback expired");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
} else {
@@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode)
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
- rcu_assign_pointer(vnode->permits, NULL);
+ RCU_INIT_POINTER(vnode->permits, NULL);
mutex_unlock(&vnode->permits_lock);
if (permits)
call_rcu(&permits->rcu, afs_zap_permits);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5dfa56903a2d..a6901360fb81 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/ktime.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/rxrpc.h>
@@ -90,7 +91,10 @@ struct afs_call {
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
- unsigned last_to; /* amount of mapping[last] */
+ union {
+ unsigned last_to; /* amount of mapping[last] */
+ unsigned count2; /* count used in unmarshalling */
+ };
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
@@ -127,12 +131,11 @@ struct afs_call_type {
*/
struct afs_read {
loff_t pos; /* Where to start reading */
- loff_t len; /* How much to read */
+ loff_t len; /* How much we're asking for */
loff_t actual_len; /* How much we're actually getting */
+ loff_t remain; /* Amount remaining */
atomic_t usage;
- unsigned int remain; /* Amount remaining */
unsigned int index; /* Which page we're reading into */
- unsigned int pg_offset; /* Offset in page we're at */
unsigned int nr_pages;
void (*page_done)(struct afs_call *, struct afs_read *);
struct page *pages[];
@@ -247,7 +250,7 @@ struct afs_cache_vhash {
*/
struct afs_vlocation {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct list_head link; /* link in cell volume location list */
struct list_head grave; /* link in master graveyard list */
struct list_head update; /* link in master update list */
@@ -258,7 +261,7 @@ struct afs_vlocation {
struct afs_cache_vlocation vldb; /* volume information DB record */
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
wait_queue_head_t waitq; /* status change waitqueue */
- time_t update_at; /* time at which record should be updated */
+ time64_t update_at; /* time at which record should be updated */
spinlock_t lock; /* access lock */
afs_vlocation_state_t state; /* volume location state */
unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
@@ -271,7 +274,7 @@ struct afs_vlocation {
*/
struct afs_server {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct in_addr addr; /* server address */
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
@@ -374,8 +377,8 @@ struct afs_vnode {
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
struct work_struct cb_broken_work; /* work to be done on callback break */
- time_t cb_expires; /* time at which callback expires */
- time_t cb_expires_at; /* time used to order cb_promise */
+ time64_t cb_expires; /* time at which callback expires */
+ time64_t cb_expires_at; /* time used to order cb_promise */
unsigned cb_version; /* callback version */
unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
@@ -557,7 +560,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
extern void afs_mntpt_kill_timer(void);
/*
@@ -718,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa0d8b3..100b207efc9e 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGEN_OPCODE: return -ENOTSUPP;
+
default: return -EREMOTEIO;
}
}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index d4fb0afc0097..bd3b65cde282 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -47,59 +47,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
- struct page *page;
- size_t size;
- char *buf;
- int ret;
-
- _enter("{%x:%u,%u}",
- vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
- /* read the contents of the symlink into the pagecache */
- page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
- afs_page_filler, key);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto out;
- }
-
- ret = -EIO;
- if (PageError(page))
- goto out_free;
-
- buf = kmap(page);
-
- /* examine the symlink's contents */
- size = vnode->status.size;
- _debug("symlink to %*.*s", (int) size, (int) size, buf);
-
- if (size > 2 &&
- (buf[0] == '%' || buf[0] == '#') &&
- buf[size - 1] == '.'
- ) {
- _debug("symlink is a mountpoint");
- spin_lock(&vnode->lock);
- set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
- spin_unlock(&vnode->lock);
- }
-
- ret = 0;
-
- kunmap(page);
-out_free:
- put_page(page);
-out:
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* no valid lookup procedure on this sort of dir
*/
static struct dentry *afs_mntpt_lookup(struct inode *dir,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 419ef05dcb5e..8f76b13d5549 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
call->buffer = NULL;
}
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+ struct bio_vec *bv, pgoff_t first, pgoff_t last,
+ unsigned offset)
+{
+ struct page *pages[AFS_BVEC_MAX];
+ unsigned int nr, n, i, to, bytes = 0;
+
+ nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+ n = find_get_pages_contig(call->mapping, first, nr, pages);
+ ASSERTCMP(n, ==, nr);
+
+ msg->msg_flags |= MSG_MORE;
+ for (i = 0; i < nr; i++) {
+ to = PAGE_SIZE;
+ if (first + i >= last) {
+ to = call->last_to;
+ msg->msg_flags &= ~MSG_MORE;
+ }
+ bv[i].bv_page = pages[i];
+ bv[i].bv_len = to - offset;
+ bv[i].bv_offset = offset;
+ bytes += to - offset;
+ offset = 0;
+ }
+
+ iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
/*
* attach the data from a bunch of pages on an inode to a call
*/
static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
{
- struct page *pages[8];
- unsigned count, n, loop, offset, to;
+ struct bio_vec bv[AFS_BVEC_MAX];
+ unsigned int bytes, nr, loop, offset;
pgoff_t first = call->first, last = call->last;
int ret;
- _enter("");
-
offset = call->first_offset;
call->first_offset = 0;
do {
- _debug("attach %lx-%lx", first, last);
-
- count = last - first + 1;
- if (count > ARRAY_SIZE(pages))
- count = ARRAY_SIZE(pages);
- n = find_get_pages_contig(call->mapping, first, count, pages);
- ASSERTCMP(n, ==, count);
-
- loop = 0;
- do {
- struct bio_vec bvec = {.bv_page = pages[loop],
- .bv_offset = offset};
- msg->msg_flags = 0;
- to = PAGE_SIZE;
- if (first + loop >= last)
- to = call->last_to;
- else
- msg->msg_flags = MSG_MORE;
- bvec.bv_len = to - offset;
- offset = 0;
-
- _debug("- range %u-%u%s",
- offset, to, msg->msg_flags ? " [more]" : "");
- iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
- &bvec, 1, to - offset);
-
- /* have to change the state *before* sending the last
- * packet as RxRPC might give us the reply before it
- * returns from sending the request */
- if (first + loop >= last)
- call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
- msg, to - offset);
- if (ret < 0)
- break;
- } while (++loop < count);
- first += count;
-
- for (loop = 0; loop < count; loop++)
- put_page(pages[loop]);
+ afs_load_bvec(call, msg, bv, first, last, offset);
+ offset = 0;
+ bytes = msg->msg_iter.count;
+ nr = msg->msg_iter.nr_segs;
+
+ /* Have to change the state *before* sending the last
+ * packet as RxRPC might give us the reply before it
+ * returns from sending the request.
+ */
+ if (first + nr - 1 >= last)
+ call->state = AFS_CALL_AWAIT_REPLY;
+ ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+ msg, bytes);
+ for (loop = 0; loop < nr; loop++)
+ put_page(bv[loop].bv_page);
if (ret < 0)
break;
+
+ first += nr;
} while (first <= last);
- _leave(" = %d", ret);
return ret;
}
@@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
+ size_t offset;
+ u32 abort_code;
int ret;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_controllen = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
- /* have to change the state *before* sending the last packet as RxRPC
- * might give us the reply before it returns from sending the
- * request */
+ /* We have to change the state *before* sending the last packet as
+ * rxrpc might give us the reply before it returns from sending the
+ * request. Further, if the send fails, we may already have been given
+ * a notification and may have collected it.
+ */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
return afs_wait_for_call_to_complete(call);
error_do_abort:
- rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+ call->state = AFS_CALL_COMPLETE;
+ if (ret != -ECONNABORTED) {
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+ -ret, "KSD");
+ } else {
+ abort_code = 0;
+ offset = 0;
+ rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+ false, &abort_code);
+ ret = call->type->abort_to_error(abort_code);
+ }
error_kill_call:
afs_put_call(call);
_leave(" = %d", ret);
@@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
case -EINPROGRESS:
case -EAGAIN:
goto out;
+ case -ECONNABORTED:
+ goto call_complete;
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KNC");
- goto do_abort;
+ goto save_error;
case -ENOTSUPP:
- abort_code = RX_INVALID_OPERATION;
+ abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KIV");
- goto do_abort;
+ goto save_error;
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
@@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, EBADMSG, "KUM");
- goto do_abort;
+ goto save_error;
}
}
@@ -482,8 +505,9 @@ out:
_leave("");
return;
-do_abort:
+save_error:
call->error = ret;
+call_complete:
call->state = AFS_CALL_COMPLETE;
goto done;
}
@@ -493,7 +517,6 @@ do_abort:
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
- const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
continue;
}
- abort_why = "KWC";
- ret = call->error;
- if (call->state == AFS_CALL_COMPLETE)
- break;
- abort_why = "KWI";
- ret = -EINTR;
- if (signal_pending(current))
+ if (call->state == AFS_CALL_COMPLETE ||
+ signal_pending(current))
break;
schedule();
}
@@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* kill the call */
+ /* Kill off the call if it's still live. */
if (call->state < AFS_CALL_COMPLETE) {
- _debug("call incomplete");
+ _debug("call interrupted");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_CALL_DEAD, -ret, abort_why);
+ RX_USER_ABORT, -EINTR, "KWI");
}
+ ret = call->error;
_debug("call complete");
afs_put_call(call);
_leave(" = %d", ret);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..ecb86a670180 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
- rcu_assign_pointer(vnode->permits, NULL);
+ RCU_INIT_POINTER(vnode->permits, NULL);
mutex_unlock(&vnode->permits_lock);
if (permits)
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
} else {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
+ if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+ goto permission_denied;
if (mask & (MAY_EXEC | MAY_READ)) {
if (!(access & AFS_ACE_READ))
goto permission_denied;
+ if (!(inode->i_mode & S_IRUSR))
+ goto permission_denied;
} else if (mask & MAY_WRITE) {
if (!(access & AFS_ACE_WRITE))
goto permission_denied;
+ if (!(inode->i_mode & S_IWUSR))
+ goto permission_denied;
}
}
key_put(key);
- ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab7dd55..c001b1f2455f 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
spin_lock(&afs_server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &afs_server_graveyard);
- server->time_of_death = get_seconds();
+ server->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_server_reaper,
afs_server_timeout * HZ);
}
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_server *server;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_server_graveyard_lock);
while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index d7d8dd8c0b31..37b7c3b342a6 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
struct afs_vlocation *xvl;
/* wait at least 10 minutes before updating... */
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
if (atomic_read(&vl->usage) == 0) {
_debug("buried");
list_move_tail(&vl->grave, &afs_vlocation_graveyard);
- vl->time_of_death = get_seconds();
+ vl->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_vlocation_reap,
afs_vlocation_timeout * HZ);
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_vlocation *vl;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_vlocation_graveyard_lock);
while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
{
struct afs_cache_vlocation vldb;
struct afs_vlocation *vl, *xvl;
- time_t now;
+ time64_t now;
long timeout;
int ret;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find a record to update */
spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c83c1a0e851f..2d2fccd5044b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
* partly or wholly fill a page that's under preparation for writing
*/
static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
- loff_t pos, struct page *page)
+ loff_t pos, unsigned int len, struct page *page)
{
struct afs_read *req;
- loff_t i_size;
int ret;
_enter(",,%llu", (unsigned long long)pos);
@@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
atomic_set(&req->usage, 1);
req->pos = pos;
+ req->len = len;
req->nr_pages = 1;
req->pages[0] = page;
-
- i_size = i_size_read(&vnode->vfs_inode);
- if (pos + PAGE_SIZE > i_size)
- req->len = i_size - pos;
- else
- req->len = PAGE_SIZE;
+ get_page(page);
ret = afs_vnode_fetch_data(vnode, key, req);
afs_put_read(req);
@@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
kfree(candidate);
return -ENOMEM;
}
- *pagep = page;
- /* page won't leak in error case: it eventually gets cleaned off LRU */
if (!PageUptodate(page) && len != PAGE_SIZE) {
- ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+ ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
@@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
+ /* page won't leak in error case: it eventually gets cleaned off LRU */
+ *pagep = page;
+
try_again:
spin_lock(&vnode->writeback_lock);
@@ -233,7 +231,7 @@ flush_conflicting_wb:
if (wb->state == AFS_WBACK_PENDING)
wb->state = AFS_WBACK_CONFLICTING;
spin_unlock(&vnode->writeback_lock);
- if (PageDirty(page)) {
+ if (clear_page_dirty_for_io(page)) {
ret = afs_write_back_from_locked_page(wb, page);
if (ret < 0) {
afs_put_writeback(candidate);
@@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct key *key = file->private_data;
loff_t i_size, maybe_i_size;
+ int ret;
_enter("{%x:%u},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
spin_unlock(&vnode->writeback_lock);
}
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ /* Try and load any missing data from the server. The
+ * unmarshalling routine will take care of clearing any
+ * bits that are beyond the EOF.
+ */
+ ret = afs_fill_page(vnode, key, pos + copied,
+ len - copied, page);
+ if (ret < 0)
+ return ret;
+ }
+ SetPageUptodate(page);
+ }
+
set_page_dirty(page);
if (PageDirty(page))
_debug("dirtied");
@@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
- ClearPageUptodate(pv.pages[loop]);
+ struct page *page = pv.pages[loop];
+ ClearPageUptodate(page);
if (error)
- SetPageError(pv.pages[loop]);
- end_page_writeback(pv.pages[loop]);
+ SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ if (page->index >= first)
+ first = page->index + 1;
}
__pagevec_release(&pv);
@@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
_enter(",%lx", primary_page->index);
count = 1;
- if (!clear_page_dirty_for_io(primary_page))
- BUG();
if (test_set_page_writeback(primary_page))
BUG();
@@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping,
*/
lock_page(page);
- if (page->mapping != mapping) {
+ if (page->mapping != mapping || !PageDirty(page)) {
unlock_page(page);
put_page(page);
continue;
}
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
-
- if (PageWriteback(page) || !PageDirty(page)) {
+ if (PageWriteback(page)) {
unlock_page(page);
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+ put_page(page);
continue;
}
@@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping,
wb->state = AFS_WBACK_WRITING;
spin_unlock(&wb->vnode->writeback_lock);
+ if (!clear_page_dirty_for_io(page))
+ BUG();
ret = afs_write_back_from_locked_page(wb, page);
unlock_page(page);
put_page(page);
@@ -746,6 +764,20 @@ out:
}
/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+ _enter("");
+
+ if ((file->f_mode & FMODE_WRITE) == 0)
+ return 0;
+
+ return vfs_fsync(file, 0);
+}
+
+/*
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7d398d300e97..9382db998ec9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
newsock->type = con->sock->type;
newsock->ops = con->sock->ops;
- result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+ result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
if (result < 0)
goto accept_err;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e8..ee2d0a485fc3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks / 8;
+ si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
get_cache:
si->cache_mem = 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de7..8d5c62b07b28 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
- clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d6114..0a6e115562f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
struct mutex build_lock; /* lock for build free nids */
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
unsigned char *nat_block_bitmap;
+ unsigned short *free_nid_count; /* free nid count of NAT block */
+ spinlock_t free_nid_lock; /* protect updating of nid count */
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee8..481aa8dc79f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
- if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
- clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+ bool set, bool build, bool locked)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
return;
if (set)
- set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
else
- clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+ if (!locked)
+ spin_lock(&nm_i->free_nid_lock);
+ if (set)
+ nm_i->free_nid_count[nat_ofs]++;
+ else if (!build)
+ nm_i->free_nid_count[nat_ofs]--;
+ if (!locked)
+ spin_unlock(&nm_i->free_nid_lock);
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
int i;
- set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+ if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+ return;
+
+ __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
i = start_nid % NAT_ENTRY_PER_BLOCK;
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR)
freed = add_free_nid(sbi, start_nid, true);
- update_free_nid_bitmap(sbi, start_nid, freed);
+ update_free_nid_bitmap(sbi, start_nid, freed, true, false);
}
}
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++) {
if (!test_bit_le(i, nm_i->nat_block_bitmap))
continue;
+ if (!nm_i->free_nid_count[i])
+ continue;
for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
nid_t nid;
@@ -1907,58 +1919,6 @@ out:
up_read(&nm_i->nat_tree_lock);
}
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct page *page;
- unsigned int i = 0;
- nid_t nid;
-
- if (!enabled_nat_bits(sbi, NULL))
- return -EAGAIN;
-
- down_read(&nm_i->nat_tree_lock);
-check_empty:
- i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
- if (i >= nm_i->nat_blocks) {
- i = 0;
- goto check_partial;
- }
-
- for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
- nid++) {
- if (unlikely(nid >= nm_i->max_nid))
- break;
- add_free_nid(sbi, nid, true);
- }
-
- if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
- goto out;
- i++;
- goto check_empty;
-
-check_partial:
- i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
- if (i >= nm_i->nat_blocks) {
- disable_nat_bits(sbi, true);
- up_read(&nm_i->nat_tree_lock);
- return -EINVAL;
- }
-
- nid = i * NAT_ENTRY_PER_BLOCK;
- page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
- f2fs_put_page(page, 1);
-
- if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
- i++;
- goto check_partial;
- }
-out:
- up_read(&nm_i->nat_tree_lock);
- return 0;
-}
-
static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
if (nm_i->nid_cnt[FREE_NID_LIST])
return;
-
- /* try to find free nids with nat_bits */
- if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
- return;
- }
-
- /* find next valid candidate */
- if (enabled_nat_bits(sbi, NULL)) {
- int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
- nm_i->nat_blocks, 0);
-
- if (idx >= nm_i->nat_blocks)
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- else
- nid = idx * NAT_ENTRY_PER_BLOCK;
}
/* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
nm_i->available_nids--;
- update_free_nid_bitmap(sbi, *nid, false);
+ update_free_nid_bitmap(sbi, *nid, false, false, false);
spin_unlock(&nm_i->nid_list_lock);
return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
nm_i->available_nids++;
- update_free_nid_bitmap(sbi, nid, true);
+ update_free_nid_bitmap(sbi, nid, true, false, false);
spin_unlock(&nm_i->nid_list_lock);
@@ -2383,7 +2328,7 @@ add_out:
list_add_tail(&nes->set_list, head);
}
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
struct page *page)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
valid++;
}
if (valid == 0) {
- set_bit_le(nat_index, nm_i->empty_nat_bits);
- clear_bit_le(nat_index, nm_i->full_nat_bits);
+ __set_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
return;
}
- clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->empty_nat_bits);
if (valid == NAT_ENTRY_PER_BLOCK)
- set_bit_le(nat_index, nm_i->full_nat_bits);
+ __set_bit_le(nat_index, nm_i->full_nat_bits);
else
- clear_bit_le(nat_index, nm_i->full_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
}
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
add_free_nid(sbi, nid, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
NM_I(sbi)->available_nids++;
- update_free_nid_bitmap(sbi, nid, true);
+ update_free_nid_bitmap(sbi, nid, true, false, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
} else {
spin_lock(&NM_I(sbi)->nid_list_lock);
- update_free_nid_bitmap(sbi, nid, false);
+ update_free_nid_bitmap(sbi, nid, false, false, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
}
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int i = 0;
+ nid_t nid, last_nid;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return;
+
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks)
+ break;
+
+ __set_bit_le(i, nm_i->nat_block_bitmap);
+
+ nid = i * NAT_ENTRY_PER_BLOCK;
+ last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+ spin_lock(&nm_i->free_nid_lock);
+ for (; nid < last_nid; nid++)
+ update_free_nid_bitmap(sbi, nid, true, true, true);
+ spin_unlock(&nm_i->free_nid_lock);
+ }
+
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks)
+ break;
+
+ __set_bit_le(i, nm_i->nat_block_bitmap);
+ }
+}
+
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
return 0;
}
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
GFP_KERNEL);
if (!nm_i->nat_block_bitmap)
return -ENOMEM;
+
+ nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+ sizeof(unsigned short), GFP_KERNEL);
+ if (!nm_i->free_nid_count)
+ return -ENOMEM;
+
+ spin_lock_init(&nm_i->free_nid_lock);
+
return 0;
}
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
+ /* load free nid status from nat_bits table */
+ load_free_nid_bitmap(sbi);
+
build_free_nids(sbi, true, true);
return 0;
}
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kvfree(nm_i->nat_block_bitmap);
kvfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_count);
kfree(nm_i->nat_bitmap);
kfree(nm_i->nat_bits);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332..29ef7088c558 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
if (f2fs_discard_en(sbi) &&
!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
+
+ /* don't overwrite by SSR to keep node chain */
+ if (se->type == CURSEG_WARM_NODE) {
+ if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+ se->ckpt_valid_blocks++;
+ }
} else {
if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
#ifdef CONFIG_F2FS_CHECK_FS
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f..63ee2940775c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
spin_unlock_bh(&wb->work_lock);
}
+static void finish_writeback_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct wb_completion *done = work->done;
+
+ if (work->auto_free)
+ kfree(work);
+ if (done && atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wb->bdi->wb_waitq);
+}
+
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
- spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state))
- goto out_unlock;
if (work->done)
atomic_inc(&work->done->cnt);
- list_add_tail(&work->list, &wb->work_list);
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+ spin_lock_bh(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ } else
+ finish_writeback_work(wb, work);
+
spin_unlock_bh(&wb->work_lock);
}
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
- struct wb_completion *done = work->done;
-
trace_writeback_exec(wb, work);
-
wrote += wb_writeback(wb, work);
-
- if (work->auto_free)
- kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ finish_writeback_work(wb, work);
}
/*
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c45084ac642d..511e1ed7e2de 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index bb79972dc638..773774531aff 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -232,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_module = THIS_MODULE,
};
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = &nfs41_cb_sv_ops,
};
#else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = NULL,
};
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 91a8d610ba0f..390ada8741bc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
return NULL;
}
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
{
return clp->cl_cons_state <= NFS_CS_READY;
}
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+ /* called without checking nfs_client_init_is_complete */
+ if (clp->cl_cons_state > NFS_CS_READY) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+ return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
int nfs_wait_client_init_complete(const struct nfs_client *clp)
{
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index f956ca20a8a3..d913e818858f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs4_pnfs_ds *ret = ds;
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+ int status;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
if (ds->ds_clp)
goto out_test_devid;
- nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+ status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
dataserver_retrans, 4,
s->nfs_client->cl_minorversion);
+ if (status) {
+ nfs4_mark_deviceid_unavailable(devid);
+ ret = NULL;
+ goto out;
+ }
out_test_devid:
if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f4f39b0ab09b..98b34c9b0564 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{
- return nfs4_test_deviceid_unavailable(node);
+ /*
+ * Flexfiles should never mark a DS unavailable, but if it does
+ * print a (ratelimited) warning as this can affect performance.
+ */
+ if (nfs4_test_deviceid_unavailable(node)) {
+ u32 *p = (u32 *)node->deviceid.data;
+
+ pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+ "unavailable device [%x%x%x%x]\n",
+ p[0], p[1], p[2], p[3]);
+ return true;
+ }
+ return false;
}
static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e5a6f248697b..85fde93dff77 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
+ int status;
if (!ff_layout_mirror_valid(lseg, mirror, true)) {
pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
*/
- nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+ status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version);
@@ -420,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
goto out;
}
+out_fail:
ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
-out_fail:
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 09ca5095c04e..7b38fedb7e03 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
struct nfs_fattr *,
rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 5ae9d64ea08b..8346ccbf2d52 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (server->rsize > server_resp_sz)
+ if (!server->rsize || server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (server->wsize > server_rqst_sz)
+ if (!server->wsize || server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b183686c6d4..c780d98035cc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
return 0;
- /* even though OPEN succeeded, access is denied. Close the file */
- nfs4_close_state(state, fmode);
return -EACCES;
}
@@ -7427,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data)
struct nfs41_exchange_id_data *cdata =
(struct nfs41_exchange_id_data *)data;
- nfs_put_client(cdata->args.client);
if (cdata->xprt) {
xprt_put(cdata->xprt);
rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
}
+ nfs_put_client(cdata->args.client);
kfree(cdata->res.impl_id);
kfree(cdata->res.server_scope);
kfree(cdata->res.server_owner);
@@ -7538,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- status = PTR_ERR(task);
- goto out_impl_id;
- }
+ if (IS_ERR(task))
+ return PTR_ERR(task);
if (!xprt) {
status = rpc_wait_for_completion_task(task);
@@ -7569,6 +7565,7 @@ out_server_owner:
kfree(calldata->res.server_owner);
out_calldata:
kfree(calldata);
+ nfs_put_client(clp);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f0369e362753..80ce289eea05 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
if (len <= 0)
goto out;
dprintk("%s: name=%s\n", __func__, group_name->data);
- return NFS_ATTR_FATTR_OWNER_NAME;
+ return NFS_ATTR_FATTR_GROUP_NAME;
} else {
len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
XDR_MAX_NETOBJ);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 63f77b49a586..590e1e35781f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
struct nfs4_deviceid_node *devid, unsigned int timeo,
unsigned int retrans, u32 version, u32 minor_version);
struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 9414b492439f..7250b95549ec 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,15 +745,17 @@ out:
/*
* Create an rpc connection to the nfs4_pnfs_ds data server.
* Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
*/
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
struct nfs4_deviceid_node *devid, unsigned int timeo,
unsigned int retrans, u32 version, u32 minor_version)
{
- if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
- int err = 0;
+ int err;
+again:
+ err = 0;
+ if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
if (version == 3) {
err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
retrans);
@@ -766,12 +768,29 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
err = -EPROTONOSUPPORT;
}
- if (err)
- nfs4_mark_deviceid_unavailable(devid);
nfs4_clear_ds_conn_bit(ds);
} else {
nfs4_wait_ds_connect(ds);
+
+ /* what was waited on didn't connect AND didn't mark unavail */
+ if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+ goto again;
}
+
+ /*
+ * At this point the ds->ds_clp should be ready, but it might have
+ * hit an error.
+ */
+ if (!err) {
+ if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+ WARN_ON_ONCE(ds->ds_clp ||
+ !nfs4_test_deviceid_unavailable(devid));
+ return -EINVAL;
+ }
+ err = nfs_client_init_status(ds->ds_clp);
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e75b056f46f4..abb2c8a3be42 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
(long long)req_offset(req));
if (status < 0) {
nfs_context_set_write_error(req->wb_context, status);
- nfs_inode_remove_request(req);
+ if (req->wb_page)
+ nfs_inode_remove_request(req);
dprintk_cont(", error = %d\n", status);
goto next;
}
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
* returned by the server against all stored verfs. */
if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
/* We have a match */
- nfs_inode_remove_request(req);
+ if (req->wb_page)
+ nfs_inode_remove_request(req);
dprintk_cont(" OK\n");
goto next;
}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4348027384f5..d0ab7e56d0b4 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+ ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
if (ret < 0)
goto out;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..eb00bc133bca 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+ int size);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..96b45cd6c63f 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
}
#endif /* DEBUG */
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *sfp,
+ int size)
+{
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next_sfep;
+ char *endp;
+ const struct xfs_dir_ops *dops;
+ xfs_ino_t ino;
+ int i;
+ int i8count;
+ int offset;
+ __uint8_t filetype;
+
+ dops = xfs_dir_get_ops(mp, NULL);
+
+ /*
+ * Give up if the directory is way too short.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp, size >
+ offsetof(struct xfs_dir2_sf_hdr, parent));
+ XFS_WANT_CORRUPTED_RETURN(mp, size >=
+ xfs_dir2_sf_hdr_size(sfp->i8count));
+
+ endp = (char *)sfp + size;
+
+ /* Check .. entry */
+ ino = dops->sf_get_parent_ino(sfp);
+ i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+ offset = dops->data_first_offset;
+
+ /* Check all reported entries */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ /*
+ * struct xfs_dir2_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ ((char *)sfep + sizeof(*sfep)) < endp);
+
+ /* Don't allow names with known bad length. */
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = dops->sf_nextentry(sfp, sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+ /* Check that the offsets always increase. */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ xfs_dir2_sf_get_offset(sfep) >= offset);
+
+ /* Check the inode number. */
+ ino = dops->sf_get_ino(sfp, sfep);
+ i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+ /* Check the file type. */
+ filetype = dops->sf_get_ftype(sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+ offset = xfs_dir2_sf_get_offset(sfep) +
+ dops->data_entsize(sfep->namelen);
+
+ sfep = next_sfep;
+ }
+ XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+ XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+ /* Make sure this whole thing ought to be in local format. */
+ XFS_WANT_CORRUPTED_RETURN(mp, offset +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+ return 0;
+}
+
/*
* Create a new (shortform) directory.
*/
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6..9653e964eda4 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_ifork_zone;
@@ -320,6 +322,7 @@ xfs_iformat_local(
int whichfork,
int size)
{
+ int error;
/*
* If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
return -EFSCORRUPTED;
}
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(ip->i_mount,
+ (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+ size);
+ if (error)
+ return error;
+ }
+
xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
return 0;
}
@@ -856,7 +867,7 @@ xfs_iextents_copy(
* In these cases, the format always takes precedence, because the
* format indicates the current state of the fork.
*/
-void
+int
xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
char *cp;
xfs_ifork_t *ifp;
xfs_mount_t *mp;
+ int error;
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
static const short dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
if (!iip)
- return;
+ return 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
* This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
*/
if (!ifp) {
ASSERT(whichfork == XFS_ATTR_FORK);
- return;
+ return 0;
}
cp = XFS_DFORK_PTR(dip, whichfork);
mp = ip->i_mount;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(mp,
+ (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+ ifp->if_bytes);
+ if (error)
+ return error;
+ }
if ((iip->ili_fields & dataflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
ASSERT(0);
break;
}
+ return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7fb8365326d1..132dc59fdde6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ad9396e516f6 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Give up if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
-
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
-
/*
* If the block number in the offset is out of range, we're done.
*/
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7eaf1ef74e3c..c7fe2c2123ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
ip->i_d.di_flushiter = 0;
- xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
- if (XFS_IFORK_Q(ip))
- xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ if (XFS_IFORK_Q(ip)) {
+ error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ }
xfs_inobp_check(mp, bp);
/*
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index a2bfd7843f18..e2b9c6fe2714 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
int af_alg_release(struct socket *sock);
void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
void af_alg_free_sg(struct af_alg_sgl *sgl);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 673acda012af..9b05886f9773 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -287,18 +287,15 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
}
/* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Arch dependent functions for cpu hotplug support */
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
int *pcpu);
int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-void acpi_set_processor_mapping(void);
-
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
#endif
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61d042bbbf60..68449293c4b6 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -163,6 +163,7 @@ struct dccp_request_sock {
__u64 dreq_isr;
__u64 dreq_gsr;
__be32 dreq_service;
+ spinlock_t dreq_lock;
struct list_head dreq_featneg;
__u32 dreq_timestamp_echo;
__u32 dreq_timestamp_time;
diff --git a/include/linux/device.h b/include/linux/device.h
index 30c4570e928d..9ef518af5515 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
extern void lock_device_hotplug(void);
extern void unlock_device_hotplug(void);
extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
extern int device_offline(struct device *dev);
extern int device_online(struct device *dev);
extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c167fdee5f7..fbf7b39e8103 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -409,6 +409,7 @@ struct bpf_prog {
u16 pages; /* Number of allocated pages */
kmemcheck_bitfield_begin(meta);
u16 jited:1, /* Is our filter JIT'ed? */
+ locked:1, /* Program image locked? */
gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */
dst_needed:1, /* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{
- set_memory_ro((unsigned long)fp, fp->pages);
+ fp->locked = 1;
+ WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
}
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
{
- set_memory_rw((unsigned long)fp, fp->pages);
+ if (fp->locked) {
+ WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+ /* In case set_memory_rw() fails, we want to be the first
+ * to crash here instead of some random place later on.
+ */
+ fp->locked = 0;
+ }
}
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
- set_memory_ro((unsigned long)hdr, hdr->pages);
+ WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
}
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
{
- set_memory_rw((unsigned long)hdr, hdr->pages);
+ WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
}
#else
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2484b2fcc6eb..933d93656605 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
struct fwnode_handle *child,
enum gpiod_flags flags,
const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
- const char *con_id, struct fwnode_handle *child)
-{
- return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
- 0, child,
- GPIOD_ASIS,
- "?");
-}
#else /* CONFIG_GPIOLIB */
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
return ERR_PTR(-ENOSYS);
}
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
- const char *con_id, struct fwnode_handle *child)
-{
- return ERR_PTR(-ENOSYS);
-}
-
#endif /* CONFIG_GPIOLIB */
static inline
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 672cfef72fc8..97cbca19430d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -373,6 +373,8 @@
#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
#define ICC_IGRPEN1_EL1_SHIFT 0
#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
/*
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 188eced6813e..9f3616085423 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
{
return NULL;
}
+static inline bool irq_domain_check_msi_remap(void)
+{
+ return false;
+}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 1c823bef4c15..5734480c9590 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -6,6 +6,7 @@
struct kmem_cache;
struct page;
struct vm_struct;
+struct task_struct;
#ifdef CONFIG_KASAN
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da..397b7b5b1933 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev);
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
gpa_t addr);
@@ -403,7 +403,7 @@ struct kvm {
struct kvm_vm_stat stat;
struct kvm_arch arch;
refcount_t users_count;
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
spinlock_t ring_lock;
struct list_head coalesced_zones;
@@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
-static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
}
static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
@@ -877,22 +877,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-#else
-static inline int kvm_iommu_map_pages(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
- return 0;
-}
-
-static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
-#endif
-
/*
* search_memslots() and __gfn_to_memslot() are here because they are
* used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe1009084..87ff4f58a2f0 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+ })
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd0a1de..0620f5e18c96 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
- struct socket *newsock, int flags);
+ struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 772476028a65..43a774873aa9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
int genphy_suspend(struct phy_device *phydev);
int genphy_resume(struct phy_device *phydev);
int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+ return 0;
+}
void phy_driver_unregister(struct phy_driver *drv);
void phy_drivers_unregister(struct phy_driver *drv, int n);
int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644
index 000000000000..d60d4e278609
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ * lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
diff --git a/include/linux/random.h b/include/linux/random.h
index 7bd2403e4fef..ed5c3838780d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -37,14 +37,26 @@ extern void get_random_bytes(void *buf, int nbytes);
extern int add_random_ready_callback(struct random_ready_callback *rdy);
extern void del_random_ready_callback(struct random_ready_callback *rdy);
extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
#ifndef MODULE
extern const struct file_operations random_fops, urandom_fops;
#endif
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+ return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+ return get_random_u64();
+#else
+ return get_random_u32();
+#endif
+}
+
unsigned long randomize_page(unsigned long start, unsigned long range);
u32 prandom_u32(void);
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7e8597..a23a33153180 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
+/**
+ * hlist_nulls_for_each_entry_safe -
+ * iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
+ for (({barrier();}), \
+ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
+ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b7952d55b9c0..f39ae697347f 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags, int is_sendmsg);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 826f198374f8..c7a577976bec 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
return (unsigned long)min_t(u64, when, max_when);
}
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a244db5e5ff7..07a0b128625a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -476,7 +476,8 @@ struct sctp_pf {
int (*send_verify) (struct sctp_sock *, union sctp_addr *);
int (*supported_addrs)(const struct sctp_sock *, __be16 *);
struct sock *(*create_accept_sk) (struct sock *sk,
- struct sctp_association *asoc);
+ struct sctp_association *asoc,
+ bool kern);
int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db6..03252d53975d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,7 @@ struct sock_common {
* @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
* @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock: synchronizer
+ * @sk_kern_sock: True if sock is using kernel lock classes
* @sk_rcvbuf: size of receive buffer in bytes
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
#endif
kmemcheck_bitfield_begin(flags);
- unsigned int sk_padding : 2,
+ unsigned int sk_padding : 1,
+ sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
- struct sock * (*accept)(struct sock *sk, int flags, int *err);
+ struct sock * (*accept)(struct sock *sk, int flags, int *err,
+ bool kern);
int (*ioctl)(struct sock *sk, int cmd,
unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
unsigned int sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b0e275de6dec..583875ea136a 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -196,6 +196,7 @@ struct iscsi_conn {
struct iscsi_task *task; /* xmit task in progress */
/* xmit */
+ spinlock_t taskqueuelock; /* protects the next three lists */
struct list_head mgmtqueue; /* mgmt (control) xmit queue */
struct list_head cmdqueue; /* data-path cmd queue */
struct list_head requeue; /* tasks needing another run */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6f22b39f1b0c..080c7ce9bae8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev)
sdev->sdev_state == SDEV_CREATED_BLOCK;
}
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+ enum scsi_device_state new_state);
+
/* accessor functions for the SCSI parameters */
static inline int scsi_device_sync(struct scsi_device *sdev)
{
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index b54b98dc2d4a..1b0f447ce850 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -4,7 +4,12 @@
#include <linux/types.h>
#include <target/target_core_base.h>
-#define TRANSPORT_FLAG_PASSTHROUGH 1
+#define TRANSPORT_FLAG_PASSTHROUGH 0x1
+/*
+ * ALUA commands, state checks and setup operations are handled by the
+ * backend module.
+ */
+#define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2
struct request_queue;
struct scatterlist;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 37c274e61acc..4b784b6e21c0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp {
struct list_head tg_pt_gp_lun_list;
struct se_lun *tg_pt_gp_alua_lun;
struct se_node_acl *tg_pt_gp_alua_nacl;
- struct delayed_work tg_pt_gp_transition_work;
+ struct work_struct tg_pt_gp_transition_work;
struct completion *tg_pt_gp_transition_complete;
};
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 407cb55df6ac..7fb97863c945 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -33,8 +33,8 @@ extern "C" {
#define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */
struct drm_omap_param {
- uint64_t param; /* in */
- uint64_t value; /* in (set_param), out (get_param) */
+ __u64 param; /* in */
+ __u64 value; /* in (set_param), out (get_param) */
};
#define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */
@@ -53,18 +53,18 @@ struct drm_omap_param {
#define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32)
union omap_gem_size {
- uint32_t bytes; /* (for non-tiled formats) */
+ __u32 bytes; /* (for non-tiled formats) */
struct {
- uint16_t width;
- uint16_t height;
+ __u16 width;
+ __u16 height;
} tiled; /* (for tiled formats) */
};
struct drm_omap_gem_new {
union omap_gem_size size; /* in */
- uint32_t flags; /* in */
- uint32_t handle; /* out */
- uint32_t __pad;
+ __u32 flags; /* in */
+ __u32 handle; /* out */
+ __u32 __pad;
};
/* mask of operations: */
@@ -74,33 +74,33 @@ enum omap_gem_op {
};
struct drm_omap_gem_cpu_prep {
- uint32_t handle; /* buffer handle (in) */
- uint32_t op; /* mask of omap_gem_op (in) */
+ __u32 handle; /* buffer handle (in) */
+ __u32 op; /* mask of omap_gem_op (in) */
};
struct drm_omap_gem_cpu_fini {
- uint32_t handle; /* buffer handle (in) */
- uint32_t op; /* mask of omap_gem_op (in) */
+ __u32 handle; /* buffer handle (in) */
+ __u32 op; /* mask of omap_gem_op (in) */
/* TODO maybe here we pass down info about what regions are touched
* by sw so we can be clever about cache ops? For now a placeholder,
* set to zero and we just do full buffer flush..
*/
- uint32_t nregions;
- uint32_t __pad;
+ __u32 nregions;
+ __u32 __pad;
};
struct drm_omap_gem_info {
- uint32_t handle; /* buffer handle (in) */
- uint32_t pad;
- uint64_t offset; /* mmap offset (out) */
+ __u32 handle; /* buffer handle (in) */
+ __u32 pad;
+ __u64 offset; /* mmap offset (out) */
/* note: in case of tiled buffers, the user virtual size can be
* different from the physical size (ie. how many pages are needed
* to back the object) which is returned in DRM_IOCTL_GEM_OPEN..
* This size here is the one that should be used if you want to
* mmap() the buffer:
*/
- uint32_t size; /* virtual size for mmap'ing (out) */
- uint32_t __pad;
+ __u32 size; /* virtual size for mmap'ing (out) */
+ __u32 __pad;
};
#define DRM_OMAP_GET_PARAM 0x00
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..8c6d3bdb9a00 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,7 @@ typedef struct elf64_shdr {
#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
#define NT_ARM_TLS 0x401 /* ARM TLS register */
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f51d5082a377..6180ea50e9ef 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt {
#define KVM_VM_PPC_HV 1
#define KVM_VM_PPC_PR 2
+/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
+#define KVM_VM_MIPS_TE 0
+#define KVM_VM_MIPS_VZ 1
+
#define KVM_S390_SIE_PAGE_OFFSET 1
/*
@@ -883,6 +887,11 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_MMU_RADIX 134
#define KVM_CAP_PPC_MMU_HASH_V3 135
#define KVM_CAP_IMMEDIATE_EXIT 136
+#define KVM_CAP_MIPS_VZ 137
+#define KVM_CAP_MIPS_TE 138
+#define KVM_CAP_MIPS_64BIT 139
+#define KVM_CAP_S390_GS 140
+#define KVM_CAP_S390_AIS 141
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index d08c63f3dd6f..0c5d5dd61b6a 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -64,7 +64,7 @@ struct packet_diag_mclist {
__u32 pdmc_count;
__u16 pdmc_type;
__u16 pdmc_alen;
- __u8 pdmc_addr[MAX_ADDR_LEN];
+ __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */
};
struct packet_diag_ring {
diff --git a/init/main.c b/init/main.c
index eae2f15657c6..f9c9d9948203 100644
--- a/init/main.c
+++ b/init/main.c
@@ -882,7 +882,6 @@ static void __init do_basic_setup(void)
do_ctors();
usermodehelper_enable();
do_initcalls();
- random_int_secret_init();
}
static void __init do_pre_smp_initcalls(void)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ea87fb19a94..afe5bab376c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
struct bucket {
- struct hlist_head head;
+ struct hlist_nulls_head head;
raw_spinlock_t lock;
};
@@ -44,9 +45,14 @@ enum extra_elem_state {
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
- struct hlist_node hash_node;
- struct bpf_htab *htab;
- struct pcpu_freelist_node fnode;
+ struct hlist_nulls_node hash_node;
+ struct {
+ void *padding;
+ union {
+ struct bpf_htab *htab;
+ struct pcpu_freelist_node fnode;
+ };
+ };
};
union {
struct rcu_head rcu;
@@ -162,7 +168,8 @@ skip_percpu_elems:
offsetof(struct htab_elem, lru_node),
htab->elem_size, htab->map.max_entries);
else
- pcpu_freelist_populate(&htab->freelist, htab->elems,
+ pcpu_freelist_populate(&htab->freelist,
+ htab->elems + offsetof(struct htab_elem, fnode),
htab->elem_size, htab->map.max_entries);
return 0;
@@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
int err, i;
u64 cost;
+ BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+
if (lru && !capable(CAP_SYS_ADMIN))
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
goto free_htab;
for (i = 0; i < htab->n_buckets; i++) {
- INIT_HLIST_HEAD(&htab->buckets[i].head);
+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
}
@@ -366,20 +378,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
{
+ struct hlist_nulls_node *n;
+ struct htab_elem *l;
+
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+ if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ return l;
+
+ return NULL;
+}
+
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+ u32 hash, void *key,
+ u32 key_size, u32 n_buckets)
+{
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_rcu(l, head, hash_node)
+again:
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
+ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+ goto again;
+
return NULL;
}
@@ -387,7 +423,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l;
u32 hash, key_size;
@@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
head = select_bucket(htab, hash);
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
return l;
}
@@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
struct bpf_htab *htab = (struct bpf_htab *)arg;
- struct htab_elem *l, *tgt_l;
- struct hlist_head *head;
+ struct htab_elem *l = NULL, *tgt_l;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
unsigned long flags;
struct bucket *b;
@@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
raw_spin_lock_irqsave(&b->lock, flags);
- hlist_for_each_entry_rcu(l, head, hash_node)
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
break;
}
@@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l, *next_l;
u32 hash, key_size;
int i;
@@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
head = select_bucket(htab, hash);
/* lookup the key */
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
if (!l) {
i = 0;
@@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
}
/* key was found, get next key in the same bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
struct htab_elem, hash_node);
if (next_l) {
@@ -500,7 +537,7 @@ find_first_elem:
head = select_bucket(htab, i);
/* pick first element in the bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
struct htab_elem, hash_node);
if (next_l) {
/* if it's not empty, just return it */
@@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
int err = 0;
if (prealloc) {
- l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
- if (!l_new)
+ struct pcpu_freelist_node *l;
+
+ l = pcpu_freelist_pop(&htab->freelist);
+ if (!l)
err = -E2BIG;
+ else
+ l_new = container_of(l, struct htab_elem, fnode);
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
@@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
free_htab_elem(htab, l_old);
}
ret = 0;
@@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new, *l_old = NULL;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
bpf_lru_node_set_ref(&l_new->lru_node);
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
}
ret = 0;
@@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = PTR_ERR(l_new);
goto err;
}
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
ret = 0;
err:
@@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
} else {
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
ret = 0;
@@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
@@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
ret = 0;
}
@@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
@@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
ret = 0;
}
@@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
int i;
for (i = 0; i < htab->n_buckets; i++) {
- struct hlist_head *head = select_bucket(htab, i);
- struct hlist_node *n;
+ struct hlist_nulls_head *head = select_bucket(htab, i);
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_safe(l, n, head, hash_node) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+ hlist_nulls_del_rcu(&l->hash_node);
if (l->state != HTAB_EXTRA_ELEM_USED)
htab_elem_free(htab, l);
}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8bfe0afaee10..b37bd9ab7f57 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ unlock:
raw_spin_unlock(&trie->lock);
}
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -ENOTSUPP;
+}
+
static const struct bpf_map_ops trie_ops = {
.map_alloc = trie_alloc,
.map_free = trie_free,
+ .map_get_next_key = trie_get_next_key,
.map_lookup_elem = trie_lookup_elem,
.map_update_elem = trie_update_elem,
.map_delete_elem = trie_delete_elem,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 56eba9caa632..1dc22f6b49f5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
struct task_struct *task;
int count = 0;
- seq_printf(seq, "css_set %p\n", cset);
+ seq_printf(seq, "css_set %pK\n", cset);
list_for_each_entry(task, &cset->tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS)
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index e756dae49300..2237201d66d5 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
/* Only log the first time events_limit is incremented. */
if (atomic64_inc_return(&pids->events_limit) == 1) {
pr_info("cgroup: fork rejected by pids controller in ");
- pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+ pr_cont_cgroup_path(css->cgroup);
pr_cont("\n");
}
cgroup_file_notify(&pids->events_file);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f7c063239fa5..37b223e4fc05 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
struct cpuhp_step *sp;
int ret = 0;
- mutex_lock(&cpuhp_state_mutex);
-
if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
ret = cpuhp_reserve_state(state);
if (ret < 0)
- goto out;
+ return ret;
state = ret;
}
sp = cpuhp_get_step(state);
- if (name && sp->name) {
- ret = -EBUSY;
- goto out;
- }
+ if (name && sp->name)
+ return -EBUSY;
+
sp->startup.single = startup;
sp->teardown.single = teardown;
sp->name = name;
sp->multi_instance = multi_instance;
INIT_HLIST_HEAD(&sp->list);
-out:
- mutex_unlock(&cpuhp_state_mutex);
return ret;
}
@@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
goto add_node;
@@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
if (ret) {
if (sp->teardown.multi)
cpuhp_rollback_install(cpu, state, node);
- goto err;
+ goto unlock;
}
}
add_node:
ret = 0;
- mutex_lock(&cpuhp_state_mutex);
hlist_add_head(node, &sp->list);
+unlock:
mutex_unlock(&cpuhp_state_mutex);
-
-err:
put_online_cpus();
return ret;
}
@@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance);
@@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
}
}
out:
+ mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
/*
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
@@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
+
if (!invoke || !cpuhp_get_teardown_cb(state))
goto remove;
/*
@@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
}
remove:
- mutex_lock(&cpuhp_state_mutex);
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
@@ -1571,6 +1568,7 @@ remove:
return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
+
/**
* __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
* @state: The state to remove
@@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
WARN(!hlist_empty(&sp->list),
"Error: Removing state %d which has instances left.\n",
@@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
}
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+ mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a17ed56c8ce1..ff01cba86f43 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4256,7 +4256,7 @@ int perf_event_release_kernel(struct perf_event *event)
raw_spin_lock_irq(&ctx->lock);
/*
- * Mark this even as STATE_DEAD, there is no external reference to it
+ * Mark this event as STATE_DEAD, there is no external reference to it
* anymore.
*
* Anybody acquiring event->child_mutex after the below loop _must_
@@ -10417,21 +10417,22 @@ void perf_event_free_task(struct task_struct *task)
continue;
mutex_lock(&ctx->mutex);
-again:
- list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
- group_entry)
- perf_free_event(event, ctx);
+ raw_spin_lock_irq(&ctx->lock);
+ /*
+ * Destroy the task <-> ctx relation and mark the context dead.
+ *
+ * This is important because even though the task hasn't been
+ * exposed yet the context has been (through child_list).
+ */
+ RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+ WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+ put_task_struct(task); /* cannot be last */
+ raw_spin_unlock_irq(&ctx->lock);
- list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
- group_entry)
+ list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
perf_free_event(event, ctx);
- if (!list_empty(&ctx->pinned_groups) ||
- !list_empty(&ctx->flexible_groups))
- goto again;
-
mutex_unlock(&ctx->mutex);
-
put_ctx(ctx);
}
}
@@ -10469,7 +10470,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
}
/*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ * - valid pointer on success
+ * - NULL for orphaned events
+ * - IS_ERR() on error
*/
static struct perf_event *
inherit_event(struct perf_event *parent_event,
@@ -10563,6 +10569,16 @@ inherit_event(struct perf_event *parent_event,
return child_event;
}
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ * - 0 on success
+ * - <0 on error
+ */
static int inherit_group(struct perf_event *parent_event,
struct task_struct *parent,
struct perf_event_context *parent_ctx,
@@ -10577,6 +10593,11 @@ static int inherit_group(struct perf_event *parent_event,
child, NULL, child_ctx);
if (IS_ERR(leader))
return PTR_ERR(leader);
+ /*
+ * @leader can be NULL here because of is_orphaned_event(). In this
+ * case inherit_event() will create individual events, similar to what
+ * perf_group_detach() would do anyway.
+ */
list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
child_ctr = inherit_event(sub, parent, parent_ctx,
child, leader, child_ctx);
@@ -10586,6 +10607,17 @@ static int inherit_group(struct perf_event *parent_event,
return 0;
}
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ * - 0 on success
+ * - <0 on error
+ */
static int
inherit_task_group(struct perf_event *event, struct task_struct *parent,
struct perf_event_context *parent_ctx,
@@ -10608,7 +10640,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
* First allocate and initialize a context for the
* child.
*/
-
child_ctx = alloc_perf_context(parent_ctx->pmu, child);
if (!child_ctx)
return -ENOMEM;
@@ -10670,7 +10701,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
ret = inherit_task_group(event, parent, parent_ctx,
child, ctxn, &inherited_all);
if (ret)
- break;
+ goto out_unlock;
}
/*
@@ -10686,7 +10717,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
ret = inherit_task_group(event, parent, parent_ctx,
child, ctxn, &inherited_all);
if (ret)
- break;
+ goto out_unlock;
}
raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10714,6 +10745,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
}
raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
diff --git a/kernel/futex.c b/kernel/futex.c
index 229a744b1781..45858ec73941 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
{
struct hrtimer_sleeper timeout, *to = NULL;
struct rt_mutex_waiter rt_waiter;
- struct rt_mutex *pi_mutex = NULL;
struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
@@ -2899,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
+ if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+ rt_mutex_unlock(&q.pi_state->pi_mutex);
/*
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
@@ -2907,6 +2908,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
spin_unlock(q.lock_ptr);
}
} else {
+ struct rt_mutex *pi_mutex;
+
/*
* We have been woken up by futex_unlock_pi(), a timeout, or a
* signal. futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2930,18 +2933,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res)
ret = (res < 0) ? res : 0;
+ /*
+ * If fixup_pi_state_owner() faulted and was unable to handle
+ * the fault, unlock the rt_mutex and return the fault to
+ * userspace.
+ */
+ if (ret && rt_mutex_owner(pi_mutex) == current)
+ rt_mutex_unlock(pi_mutex);
+
/* Unqueue and drop the lock. */
unqueue_me_pi(&q);
}
- /*
- * If fixup_pi_state_owner() faulted and was unable to handle the
- * fault, unlock the rt_mutex and return the fault to userspace.
- */
- if (ret == -EFAULT) {
- if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
- rt_mutex_unlock(pi_mutex);
- } else if (ret == -EINTR) {
+ if (ret == -EINTR) {
/*
* We've already been requeued, but cannot restart by calling
* futex_lock_pi() directly. We could restart this syscall, but
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b56a558e406d..b118735fea9d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
ret = crypto_shash_final(desc, digest);
if (ret)
goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
- sha_regions, sha_region_sz, 0);
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+ sha_regions, sha_region_sz, 0);
if (ret)
goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
- digest, SHA256_DIGEST_SIZE, 0);
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
if (ret)
goto out_free_digest;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 4cef7e4706b0..799a8a452187 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
- unsigned long start;
- unsigned long len;
-};
-
+#include <linux/purgatory.h>
void kimage_file_post_load_cleanup(struct kimage *image);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 7bc24d477805..c65f7989f850 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
*/
if (sem->count == 0)
break;
- if (signal_pending_state(state, current)) {
- ret = -EINTR;
- goto out;
- }
+ if (signal_pending_state(state, current))
+ goto out_nolock;
+
set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
@@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
}
/* got the lock */
sem->count = -1;
-out:
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
+
+out_nolock:
+ list_del(&waiter.list);
+ if (!list_empty(&sem->wait_list))
+ __rwsem_do_wake(sem, 1);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return -EINTR;
}
void __sched __down_write(struct rw_semaphore *sem)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 06123234f118..07e85e5229da 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
- lock_device_hotplug();
mem_hotplug_begin();
arch_remove_memory(align_start, align_size);
mem_hotplug_done();
- unlock_device_hotplug();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res);
@@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (error)
goto err_pfn_remap;
- lock_device_hotplug();
mem_hotplug_begin();
error = arch_add_memory(nid, align_start, align_size, true);
mem_hotplug_done();
- unlock_device_hotplug();
if (error)
goto err_add_memory;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 99b2c33a9fbc..a2ce59015642 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
*
* This function returns true if:
*
- * runtime / (deadline - t) > dl_runtime / dl_period ,
+ * runtime / (deadline - t) > dl_runtime / dl_deadline ,
*
* IOW we can't recycle current parameters.
*
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
* task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
*/
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
- left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+ left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE);
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
}
}
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+ return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
/*
* If the entity depleted all its runtime, and if we want it to sleep
* while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
* and try to activate it.
*
* Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
- act = ns_to_ktime(dl_se->deadline);
+ act = ns_to_ktime(dl_next_period(dl_se));
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
rf.cookie = lockdep_pin_lock(&rq->lock);
+ update_rq_clock(rq);
/*
* Now that the task has been migrated to the new RQ and we
@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
timer->function = dl_task_timer;
}
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = dl_task_of(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+ if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+ dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+ if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+ return;
+ dl_se->dl_throttled = 1;
+ }
+}
+
static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{
@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
__dequeue_dl_entity(dl_se);
}
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_deadline < dl_se->dl_period;
+}
+
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -948,6 +990,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
}
/*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+ dl_check_constrained_dl(&p->dl);
+
+ /*
* If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on
* its rq, the bandwidth timer callback (which clearly has not
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 7296b7308eca..f15fb2bdbc0d 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
* If the folding window started, make sure we start writing in the
* next idle-delta.
*/
- if (!time_before(jiffies, calc_load_update))
+ if (!time_before(jiffies, READ_ONCE(calc_load_update)))
idx++;
return idx & 1;
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
struct rq *this_rq = this_rq();
/*
- * If we're still before the sample window, we're done.
+ * If we're still before the pending sample window, we're done.
*/
+ this_rq->calc_load_update = READ_ONCE(calc_load_update);
if (time_before(jiffies, this_rq->calc_load_update))
return;
@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
* accounted through the nohz accounting, so skip the entire deal and
* sync up for the next window.
*/
- this_rq->calc_load_update = calc_load_update;
if (time_before(jiffies, this_rq->calc_load_update + 10))
this_rq->calc_load_update += LOAD_FREQ;
}
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
*/
static void calc_global_nohz(void)
{
+ unsigned long sample_window;
long delta, active, n;
- if (!time_before(jiffies, calc_load_update + 10)) {
+ sample_window = READ_ONCE(calc_load_update);
+ if (!time_before(jiffies, sample_window + 10)) {
/*
* Catch-up, fold however many we are behind still
*/
- delta = jiffies - calc_load_update - 10;
+ delta = jiffies - sample_window - 10;
n = 1 + (delta / LOAD_FREQ);
active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
- calc_load_update += n * LOAD_FREQ;
+ WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
}
/*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
*/
void calc_global_load(unsigned long ticks)
{
+ unsigned long sample_window;
long active, delta;
- if (time_before(jiffies, calc_load_update + 10))
+ sample_window = READ_ONCE(calc_load_update);
+ if (time_before(jiffies, sample_window + 10))
return;
/*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
avenrun[1] = calc_load(avenrun[1], EXP_5, active);
avenrun[2] = calc_load(avenrun[2], EXP_15, active);
- calc_load_update += LOAD_FREQ;
+ WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
/*
* In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 072cbc9b175d..c0168b7da1ea 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq);
WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork);
WARN_ON_ONCE(timer_pending(timer));
diff --git a/mm/gup.c b/mm/gup.c
index c74bad1bf6e8..04aa405350dc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1455,7 +1455,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
P4D_SHIFT, next, write, pages, nr))
return 0;
- } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+ } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 295479b792ec..6fa7208bcd56 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -125,9 +125,12 @@ void put_online_mems(void)
}
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
void mem_hotplug_begin(void)
{
- assert_held_device_hotplug();
+ mutex_lock(&memory_add_remove_lock);
mem_hotplug.active_writer = current;
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
mem_hotplug.active_writer = NULL;
mutex_unlock(&mem_hotplug.lock);
memhp_lock_release();
+ mutex_unlock(&memory_add_remove_lock);
}
/* add this memory to iomem resource */
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 538998a137d2..9ac639499bd1 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
/**
* pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
*
* Returns pointer to array of pointers to struct page which can be indexed
* with pcpu_page_idx(). Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
* RETURNS:
* Pointer to temp pages array on success.
*/
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
{
static struct page **pages;
size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
{
struct page **pages;
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
if (!pages)
return -ENOMEM;
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
* successful population attempt so the temp pages array must
* be available now.
*/
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
BUG_ON(!pages);
/* unmap and free */
diff --git a/mm/percpu.c b/mm/percpu.c
index 5696039b5c07..60a6488e9e6d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1011,8 +1011,11 @@ area_found:
mutex_unlock(&pcpu_alloc_mutex);
}
- if (chunk != pcpu_reserved_chunk)
+ if (chunk != pcpu_reserved_chunk) {
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_nr_empty_pop_pages -= occ_pages;
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+ }
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
pcpu_schedule_balance_work();
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad..b1ccb58ad397 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
{
struct swap_slots_cache *cache;
- BUG_ON(!swap_slot_cache_initialized);
-
cache = &get_cpu_var(swp_slots);
if (use_swap_slot_cache && cache->slots_ret) {
spin_lock_irq(&cache->free_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0dd80222b20b..0b057628a7ba 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
if (fatal_signal_pending(current)) {
area->nr_pages = i;
- goto fail;
+ goto fail_no_warn;
}
if (node == NUMA_NO_NODE)
@@ -1709,6 +1709,7 @@ fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
vfree(area->addr);
return NULL;
}
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8970a2fd3b1a..f9492bccfd79 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -667,6 +667,7 @@ next:
z3fold_page_unlock(zhdr);
spin_lock(&pool->lock);
if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+ spin_unlock(&pool->lock);
atomic64_dec(&pool->pages_nr);
return 0;
}
diff --git a/net/atm/svc.c b/net/atm/svc.c
index db9794ec61d8..5589de7086af 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -318,7 +318,8 @@ out:
return error;
}
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
@@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk);
- error = svc_create(sock_net(sk), newsock, 0, 0);
+ error = svc_create(sock_net(sk), newsock, 0, kern);
if (error)
goto out;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a8e42cedf1db..b7c486752b3a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1320,7 +1320,8 @@ out_release:
return err;
}
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f307b145ea54..507b80d59dec 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -301,7 +301,7 @@ done:
}
static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aa1a814ceddc..ac3c650cb234 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -471,7 +471,8 @@ done:
return err;
}
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e4e9a2da1e7e..728e0c8dc8e7 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -627,7 +627,7 @@ done:
}
static int sco_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 236f34244dbe..013f2290bfa5 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook);
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ br_drop_fake_rtable(skb);
return netif_receive_skb(skb);
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 95087e6e8258..fa87fbd62bb7 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
}
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one. On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- br_drop_fake_rtable(skb);
- return NF_ACCEPT;
-}
-
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -908,12 +893,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
.priority = NF_BR_PRI_BRNF,
},
{
- .hook = br_nf_local_in,
- .pf = NFPROTO_BRIDGE,
- .hooknum = NF_BR_LOCAL_IN,
- .priority = NF_BR_PRI_BRNF,
- },
- {
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8637b2b71f3d..7869ae3837ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev)
{
rtnl_lock();
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
rtnl_unlock();
}
EXPORT_SYMBOL(netdev_notify_peers);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3945821e9c1f..65ea0ff4017c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct kobject *kobj = &dev->_rx[i].kobj;
- if (!list_empty(&dev_net(dev)->exit_list))
+ if (!atomic_read(&dev_net(dev)->count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
- if (!list_empty(&dev_net(dev)->exit_list))
+ if (!atomic_read(&dev_net(dev)->count))
queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &(ndev->dev);
- if (!list_empty(&dev_net(ndev)->exit_list))
+ if (!atomic_read(&dev_net(ndev)->count))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f3557958e9bf..cd4ba8c6b609 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3828,13 +3828,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
if (!skb_may_tx_timestamp(sk, false))
return;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- *skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
- sock_put(sk);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+ sock_put(sk);
+ }
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
@@ -3893,7 +3894,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
{
struct sock *sk = skb->sk;
struct sock_exterr_skb *serr;
- int err;
+ int err = 1;
skb->wifi_acked_valid = 1;
skb->wifi_acked = acked;
@@ -3903,14 +3904,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- err = sock_queue_err_skb(sk, skb);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
+ }
if (err)
kfree_skb(skb);
-
- sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
diff --git a/net/core/sock.c b/net/core/sock.c
index f6fd79f33097..a96d5f7a5734 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable);
/*
* Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
*/
static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
* locks is fast):
*/
+
+#define _sock_locks(x) \
+ x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
+ x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
+ x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
+ x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
+ x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
+ x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
+ x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
+ x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
+ x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
+ x "27" , x "28" , x "AF_CAN" , \
+ x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
+ x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
+ x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
+ x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
+ x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX"
+
static const char *const af_family_key_strings[AF_MAX+1] = {
- "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
- "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
- "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
- "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
- "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
- "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
- "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
- "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
- "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
- "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
- "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
- "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
- "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX"
+ _sock_locks("sk_lock-")
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
- "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
- "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
- "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
- "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
- "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
- "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
- "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
- "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
- "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
- "slock-27" , "slock-28" , "slock-AF_CAN" ,
- "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
- "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
- "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX"
+ _sock_locks("slock-")
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
- "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
- "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
- "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
- "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
- "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
- "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
- "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
- "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
- "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
- "clock-27" , "clock-28" , "clock-AF_CAN" ,
- "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
- "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
- "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX"
+ _sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+ _sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+ _sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+ _sock_locks("k-clock-")
};
/*
@@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
@@ -1293,7 +1283,16 @@ lenout:
*/
static inline void sock_lock_init(struct sock *sk)
{
- sock_lock_init_class_and_name(sk,
+ if (sk->sk_kern_sock)
+ sock_lock_init_class_and_name(
+ sk,
+ af_family_kern_slock_key_strings[sk->sk_family],
+ af_family_kern_slock_keys + sk->sk_family,
+ af_family_kern_key_strings[sk->sk_family],
+ af_family_kern_keys + sk->sk_family);
+ else
+ sock_lock_init_class_and_name(
+ sk,
af_family_slock_key_strings[sk->sk_family],
af_family_slock_keys + sk->sk_family,
af_family_key_strings[sk->sk_family],
@@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
* why we need sk_prot_creator -acme
*/
sk->sk_prot = sk->sk_prot_creator = prot;
+ sk->sk_kern_sock = kern;
sock_lock_init(sk);
sk->sk_net_refcnt = kern ? 0 : 1;
if (likely(sk->sk_net_refcnt))
@@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
}
EXPORT_SYMBOL(sock_no_socketpair);
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
return -EOPNOTSUPP;
}
@@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
}
rwlock_init(&sk->sk_callback_lock);
- lockdep_set_class_and_name(&sk->sk_callback_lock,
+ if (sk->sk_kern_sock)
+ lockdep_set_class_and_name(
+ &sk->sk_callback_lock,
+ af_kern_callback_keys + sk->sk_family,
+ af_family_kern_clock_key_strings[sk->sk_family]);
+ else
+ lockdep_set_class_and_name(
+ &sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f053198e730c..5e3a7302f774 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
for (i = 0; i < hc->tx_seqbufc; i++)
kfree(hc->tx_seqbuf[i]);
hc->tx_seqbufc = 0;
+ dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 409d0cfd3447..b99168b0fabf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- dccp_do_redirect(skb, sk);
+ if (!sock_owned_by_user(sk))
+ dccp_do_redirect(skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 233b57367758..d9b6a4e403e7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e267e6f4c9a5..abd07a443219 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct dccp_request_sock *dreq = dccp_rsk(req);
bool own_req;
+ /* TCP/DCCP listeners became lockless.
+ * DCCP stores complex state in its request_sock, so we need
+ * a protection for them, now this code runs without being protected
+ * by the parent (listener) lock.
+ */
+ spin_lock_bh(&dreq->dreq_lock);
+
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
inet_rtx_syn_ack(sk, req);
}
/* Network Duplicate, discard packet */
- return NULL;
+ goto out;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
req, &own_req);
- if (!child)
- goto listen_overflow;
-
- return inet_csk_complete_hashdance(sk, child, req, own_req);
+ if (child) {
+ child = inet_csk_complete_hashdance(sk, child, req, own_req);
+ goto out;
+ }
-listen_overflow:
- dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req);
- return NULL;
+out:
+ spin_unlock_bh(&dreq->dreq_lock);
+ return child;
}
EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req,
{
struct dccp_request_sock *dreq = dccp_rsk(req);
+ spin_lock_init(&dreq->dreq_lock);
inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
inet_rsk(req)->acked = 0;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e6e79eda9763..7de5b40a5d0d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
return skb == NULL ? ERR_PTR(err) : skb;
}
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk, *newsk;
struct sk_buff *skb = NULL;
@@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
cb = DN_SKB_CB(skb);
sk->sk_ack_backlog--;
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+ newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
if (newsk == NULL) {
release_sock(sk);
kfree_skb(skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 602d40f43687..6b1fc6e4278e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect);
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk1 = sock->sk;
int err = -EINVAL;
- struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+ struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
if (!sk2)
goto do_err;
@@ -1487,8 +1488,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
skb_set_inner_network_header(skb, nhoff);
+ }
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b4d5980ade3b..5e313c1ac94f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
/*
* This will accept the next outstanding connection.
*/
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 737ce826d7ec..7a3fd25e8913 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a89b8deafae..575e19dcc017 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- u32 mtu = tcp_sk(sk)->mtu_info;
+ struct dst_entry *dst;
+ u32 mtu;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+ mtu = tcp_sk(sk)->mtu_info;
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -428,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- do_redirect(icmp_skb, sk);
+ if (!sock_owned_by_user(sk))
+ do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 40d893556e67..b2ab411c6d37 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
sk_mem_reclaim_partial(sk);
- if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
int event;
- if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 04db40620ea6..a9a9553ee63d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -920,12 +920,12 @@ static int __init inet6_init(void)
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
- err = icmpv6_init();
- if (err)
- goto icmp_fail;
err = ip6_mr_init();
if (err)
goto ipmr_fail;
+ err = icmpv6_init();
+ if (err)
+ goto icmp_fail;
err = ndisc_init();
if (err)
goto ndisc_fail;
@@ -1061,10 +1061,10 @@ igmp_fail:
ndisc_cleanup();
ndisc_fail:
ip6_mr_cleanup();
-ipmr_fail:
- icmpv6_cleanup();
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+ icmpv6_cleanup();
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e4266746e4a2..d4bf2c68a545 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -923,6 +923,8 @@ add:
ins = &rt->dst.rt6_next;
iter = *ins;
while (iter) {
+ if (iter->rt6i_metric > rt->rt6i_metric)
+ break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
fib6_purge_rt(iter, fn, info->nl_net);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 0838e6d01d2e..93e58a5e1837 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
skb_set_inner_network_header(skb, nhoff);
+ }
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 528b3c1f3fde..58f6288e9ba5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -768,13 +768,14 @@ slow_path:
* Fragment the datagram.
*/
- *prevhdr = NEXTHDR_FRAGMENT;
troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
*/
while (left > 0) {
+ u8 *fragnexthdr_offset;
+
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
@@ -819,6 +820,10 @@ slow_path:
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
/*
* Build fragment header.
*/
@@ -1385,7 +1390,7 @@ emsgsize:
if ((((length + fragheaderlen) > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 644ba59fbd9d..3d8a3b63b4fd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- else
+ } else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ }
return -EMSGSIZE;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 229bfcc451ef..35c58b669ebd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3299,7 +3299,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + nla_total_size(4) /* RTA_OIF */
+ lwtunnel_get_encap_size(rt->dst.lwtstate);
nexthop_len *= rt->rt6i_nsiblings;
@@ -3323,7 +3322,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
}
static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
- unsigned int *flags)
+ unsigned int *flags, bool skip_oif)
{
if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
*flags |= RTNH_F_LINKDOWN;
@@ -3336,7 +3335,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
- if (rt->dst.dev &&
+ /* not needed for multipath encoding b/c it has a rtnexthop struct */
+ if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
goto nla_put_failure;
@@ -3350,6 +3350,7 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* add multipath next hop */
static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
{
struct rtnexthop *rtnh;
@@ -3362,7 +3363,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
rtnh->rtnh_hops = 0;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
- if (rt6_nexthop_info(skb, rt, &flags) < 0)
+ if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
goto nla_put_failure;
rtnh->rtnh_flags = flags;
@@ -3515,7 +3516,7 @@ static int rt6_fill_node(struct net *net,
nla_nest_end(skb, mp);
} else {
- if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+ if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
goto nla_put_failure;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 60a5295a7de6..49fa2e8c3fa9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 81adc29a448d..8d77ad5cadaf 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -828,7 +828,8 @@ out:
* Wait for incoming connection
*
*/
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct irda_sock *new, *self = irda_sk(sk);
@@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
struct sk_buff *skb = NULL;
int err;
- err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+ err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
if (err)
return err;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 89bbde1081ce..84de7b6326dc 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -938,7 +938,7 @@ done:
/* Accept a pending connection */
static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *nsk;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 06186d608a27..cb4fff785cbf 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
* @flags: User specified operational flags.
+ * @kern: If the socket is kernel internal
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b2..33211f9a2656 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1288,7 +1288,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
/* fall through */
case NETDEV_CHANGE:
nh->nh_flags |= RTNH_F_LINKDOWN;
- ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+ if (event != NETDEV_UNREGISTER)
+ ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
break;
}
if (event == NETDEV_UNREGISTER)
@@ -2028,6 +2029,7 @@ static void mpls_net_exit(struct net *net)
for (index = 0; index < platform_labels; index++) {
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
RCU_INIT_POINTER(platform_label[index], NULL);
+ mpls_notify_route(net, index, rt, NULL, NULL);
mpls_rt_free(rt);
}
rtnl_unlock();
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4bbf4526b885..ebf16f7f9089 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -765,7 +765,8 @@ out_release:
return err;
}
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 879885b31cce..2ffb18e73df6 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
}
static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *new_sk;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 222bedcd9575..e81537991ddf 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
sock_put(sk);
}
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+ bool kern)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
@@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
}
/* Create a new to-be-accepted sock */
- newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+ kern);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index a6c8da3ee893..64634e3ec2fc 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -305,7 +305,7 @@ out:
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
struct sock *sk = sock->sk;
struct sock *newsk;
@@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
- newsk = sk->sk_prot->accept(sk, flags, &err);
+ newsk = sk->sk_prot->accept(sk, flags, &err, kern);
if (!newsk)
return err;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 0e04dcceb1d4..1fa75ab7b733 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
+ put_net(conn->c_net);
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index ce3775abc6e7..1c38d2c7caa8 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ic->i_send_cq = NULL;
ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
rdsdebug("ib_create_cq send failed: %d\n", ret);
- goto out;
+ goto rds_ibdev_out;
}
ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
@@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ic->i_recv_cq = NULL;
ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
rdsdebug("ib_create_cq recv failed: %d\n", ret);
- goto out;
+ goto send_cq_out;
}
ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
if (ret) {
rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
if (ret) {
rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
/* XXX negotiate max send/recv with remote? */
@@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
if (ret) {
rdsdebug("rdma_create_qp failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent send failed\n");
- goto out;
+ goto qp_out;
}
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent recv failed\n");
- goto out;
+ goto send_hdrs_dma_out;
}
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_ack) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent ack failed\n");
- goto out;
+ goto recv_hdrs_dma_out;
}
ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_sends) {
ret = -ENOMEM;
rdsdebug("send allocation failed\n");
- goto out;
+ goto ack_dma_out;
}
ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_recvs) {
ret = -ENOMEM;
rdsdebug("recv allocation failed\n");
- goto out;
+ goto sends_out;
}
rds_ib_recv_init_ack(ic);
@@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
-out:
+ return ret;
+
+sends_out:
+ vfree(ic->i_sends);
+ack_dma_out:
+ ib_dma_free_coherent(dev, sizeof(struct rds_header),
+ ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+ rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+ if (!ib_destroy_cq(ic->i_recv_cq))
+ ic->i_recv_cq = NULL;
+send_cq_out:
+ if (!ib_destroy_cq(ic->i_send_cq))
+ ic->i_send_cq = NULL;
+rds_ibdev_out:
+ rds_ib_remove_conn(rds_ibdev, conn);
rds_ib_dev_put(rds_ibdev);
+
return ret;
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 39518ef7af4d..82d38ccf5e8b 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -147,7 +147,7 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
- possible_net_t c_net;
+ struct net *c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
@@ -162,13 +162,13 @@ struct rds_connection {
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
- return read_pnet(&conn->c_net);
+ return conn->c_net;
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
- write_pnet(&conn->c_net, net);
+ conn->c_net = get_net(net);
}
#define RDS_FLAG_CONG_BITMAP 0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index a973d3b4dff0..225690076773 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
* we do need to clean up the listen socket here.
*/
if (rtn->rds_tcp_listen_sock) {
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
+
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
}
}
@@ -523,13 +524,13 @@ static void rds_tcp_kill_sock(struct net *net)
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+ struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
@@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
void *rds_tcp_listen_sock_def_readable(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+ if (!lsock)
+ return NULL;
- return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+ return lsock->sk->sk_user_data;
}
static int rds_tcp_dev_event(struct notifier_block *this,
@@ -584,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+ struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
@@ -638,19 +643,19 @@ static int rds_tcp_init(void)
goto out;
}
- ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
- if (ret) {
- pr_warn("could not register rds_tcp_dev_notifier\n");
+ ret = rds_tcp_recv_init();
+ if (ret)
goto out_slab;
- }
ret = register_pernet_subsys(&rds_tcp_net_ops);
if (ret)
- goto out_notifier;
+ goto out_recv;
- ret = rds_tcp_recv_init();
- if (ret)
+ ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+ if (ret) {
+ pr_warn("could not register rds_tcp_dev_notifier\n");
goto out_pernet;
+ }
rds_trans_register(&rds_tcp_transport);
@@ -660,9 +665,8 @@ static int rds_tcp_init(void)
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
- if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
- pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+ rds_tcp_recv_exit();
out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 9a1cc8906576..56ea6620fcf9 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 67d0929c7d3d..507678853e6c 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+ ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
if (ret < 0)
goto out;
@@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
* before it has been accepted and the accepter has set up their
* data_ready.. we only want to queue listen work for our listening
* socket
+ *
+ * (*ready)() may be null if we are racing with netns delete, and
+ * the listen socket is being torn down.
*/
if (sk->sk_state == TCP_LISTEN)
rds_tcp_accept_work(sk);
@@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
out:
read_unlock_bh(&sk->sk_callback_lock);
- ready(sk);
+ if (ready)
+ ready(sk);
}
struct socket *rds_tcp_listen_init(struct net *net)
@@ -271,7 +275,7 @@ out:
return NULL;
}
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
{
struct sock *sk;
@@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
/* wait for accepts to stop and close the socket */
flush_workqueue(rds_wq);
+ flush_work(acceptor);
sock_release(sock);
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index b8a1df2c9785..4a9729257023 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -871,7 +871,8 @@ out_release:
return err;
}
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9f4cfa25af7c..18b2ad8be8e2 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ enum rxrpc_call_state state;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int ix;
rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
@@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
_proto("Rx DATA %%%u { #%u f=%02x }",
sp->hdr.serial, seq, sp->hdr.flags);
- if (call->state >= RXRPC_CALL_COMPLETE)
+ state = READ_ONCE(call->state);
+ if (state >= RXRPC_CALL_COMPLETE)
return;
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
- if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
- call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+ state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
return;
@@ -650,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
+ bool wake = false;
u32 rwind = ntohl(ackinfo->rwind);
_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -657,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
- call->tx_winsize = rwind;
+ if (call->tx_winsize != rwind) {
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (rwind > call->tx_winsize)
+ wake = true;
+ call->tx_winsize = rwind;
+ }
+
if (call->cong_ssthresh > rwind)
call->cong_ssthresh = rwind;
@@ -673,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
spin_unlock_bh(&peer->lock);
_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
}
+
+ if (wake)
+ wake_up(&call->waitq);
}
/*
@@ -799,7 +810,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
return rxrpc_proto_abort("AK0", call, 0);
/* Ignore ACKs unless we are or have just been transmitting. */
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -940,7 +951,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
struct rxrpc_call *call)
{
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 6491ca46a03f..3e2f1a8e9c5b 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -527,7 +527,7 @@ try_again:
msg->msg_namelen = len;
}
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_ACCEPTING:
ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
break;
@@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
mutex_lock(&call->user_mutex);
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
case RXRPC_CALL_SERVER_RECV_REQUEST:
case RXRPC_CALL_SERVER_ACK_REQUEST:
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index bc2d3dcff9de..97ab214ca411 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
+ enum rxrpc_call_state state;
enum rxrpc_command cmd;
struct rxrpc_call *call;
unsigned long user_call_ID = 0;
@@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
return PTR_ERR(call);
/* ... and we have the call lock. */
} else {
- ret = -EBUSY;
- if (call->state == RXRPC_CALL_UNINITIALISED ||
- call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
- call->state == RXRPC_CALL_SERVER_PREALLOC ||
- call->state == RXRPC_CALL_SERVER_SECURING ||
- call->state == RXRPC_CALL_SERVER_ACCEPTING)
+ switch (READ_ONCE(call->state)) {
+ case RXRPC_CALL_UNINITIALISED:
+ case RXRPC_CALL_CLIENT_AWAIT_CONN:
+ case RXRPC_CALL_SERVER_PREALLOC:
+ case RXRPC_CALL_SERVER_SECURING:
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ ret = -EBUSY;
goto error_release_sock;
+ default:
+ break;
+ }
ret = mutex_lock_interruptible(&call->user_mutex);
release_sock(&rx->sk);
@@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
}
}
+ state = READ_ONCE(call->state);
_debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
+ call->debug_id, call->user_call_ID, state, call->conn);
- if (call->state >= RXRPC_CALL_COMPLETE) {
+ if (state >= RXRPC_CALL_COMPLETE) {
/* it's too late for this call */
ret = -ESHUTDOWN;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
@@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else if (rxrpc_is_client_call(call) &&
- call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
/* request phase complete for this client call */
ret = -EPROTO;
} else if (rxrpc_is_service_call(call) &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
/* Reply phase not begun or not complete for service call. */
ret = -EPROTO;
} else {
@@ -605,14 +611,21 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
_debug("CALL %d USR %lx ST %d on CONN %p",
call->debug_id, call->user_call_ID, call->state, call->conn);
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -ESHUTDOWN; /* it's too late for this call */
- } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- ret = -EPROTO; /* request phase complete for this client call */
- } else {
+ switch (READ_ONCE(call->state)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+ break;
+ case RXRPC_CALL_COMPLETE:
+ read_lock_bh(&call->state_lock);
+ ret = -call->error;
+ read_unlock_bh(&call->state_lock);
+ break;
+ default:
+ /* Request phase complete for this client call */
+ ret = -EPROTO;
+ break;
}
mutex_unlock(&call->user_mutex);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ab8062909962..f9bb43c25697 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+ if (!tb[TCA_CONNMARK_PARMS])
+ return -EINVAL;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(tn, parm->index, a, bind)) {
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 3b7074e23024..c736627f8f4a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
nla_put_failure:
- rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 063baac5b9fe..961ee59f696a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
/* Create and initialize a new sk for the socket to be returned by accept(). */
static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
- struct sctp_association *asoc)
+ struct sctp_association *asoc,
+ bool kern)
{
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
struct ipv6_txoptions *opt;
- newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+ newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
if (!newsk)
goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1b6d4574d2b0..989a900383b5 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
/* Create and initialize a new sk for the socket returned by accept(). */
static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
- struct sctp_association *asoc)
+ struct sctp_association *asoc,
+ bool kern)
{
struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
- sk->sk_prot, 0);
+ sk->sk_prot, kern);
struct inet_sock *newinet;
if (!newsk)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6f0a9be50f50..0f378ea2ae38 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
* descriptor will be returned from accept() to represent the newly
* formed association.
*/
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct sctp_sock *sp;
struct sctp_endpoint *ep;
@@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
*/
asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
- newsk = sp->pf->create_accept_sk(sk, asoc);
+ newsk = sp->pf->create_accept_sk(sk, asoc, kern);
if (!newsk) {
error = -ENOMEM;
goto out;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 85837ab90e89..093803786eac 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -944,7 +944,7 @@ out:
}
static int smc_accept(struct socket *sock, struct socket *new_sock,
- int flags)
+ int flags, bool kern)
{
struct sock *sk = sock->sk, *nsk;
DECLARE_WAITQUEUE(wait, current);
diff --git a/net/socket.c b/net/socket.c
index 2c1e8677ff2d..e034fe4164be 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+ err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
if (err < 0)
goto out_fd;
@@ -1731,6 +1731,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
msg.msg_iocb = NULL;
+ msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
@@ -3238,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = sock->ops->accept(sock, *newsock, flags);
+ err = sock->ops->accept(sock, *newsock, flags, true);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 81cd31acf690..3b332b395045 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct ib_cq *sendcq, *recvcq;
int rc;
- max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+ max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+ RPCRDMA_MAX_SEND_SGES);
if (max_sge < RPCRDMA_MIN_SEND_SGES) {
pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
return -ENOMEM;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43e4045e72bc..7130e73bd42c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+ bool kern);
static void tipc_sk_timeout(unsigned long data);
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
@@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
*
* Returns 0 on success, errno otherwise
*/
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+ bool kern)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
@@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ee37b390260a..928691c43408 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
static unsigned int unix_dgram_poll(struct file *, struct socket *,
@@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
set_bit(SOCK_PASSSEC, &new->flags);
}
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sock *tsk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9192ead66751..9f770f33c100 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1250,7 +1250,8 @@ out:
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *listener;
int err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fd28a49dbe8f..8b911c29860e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
return rc;
}
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sock *newsk;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0806dccdf507..236cbbc0ab9c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1243,7 +1243,7 @@ static inline int policy_to_flow_dir(int dir)
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
- const struct flowi *fl)
+ const struct flowi *fl, u16 family)
{
struct xfrm_policy *pol;
@@ -1251,8 +1251,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl,
- sk->sk_family);
+ bool match = xfrm_selector_match(&pol->selector, fl, family);
int err = 0;
if (match) {
@@ -2239,7 +2238,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
- pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@@ -2518,7 +2517,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = NULL;
sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
- pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
@@ -3069,6 +3068,11 @@ static int __net_init xfrm_net_init(struct net *net)
{
int rv;
+ /* Initialize the per-net locks here */
+ spin_lock_init(&net->xfrm.xfrm_state_lock);
+ spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
rv = xfrm_statistics_init(net);
if (rv < 0)
goto out_statistics;
@@ -3085,11 +3089,6 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out;
- /* Initialize the per-net locks here */
- spin_lock_init(&net->xfrm.xfrm_state_lock);
- spin_lock_init(&net->xfrm.xfrm_policy_lock);
- mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
return 0;
out:
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000000..067427259820
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+ struct pt_regs regs;
+ __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 581278c58488..8f74ed8e7237 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -30,8 +30,8 @@ import fcntl
import resource
import struct
import re
+import subprocess
from collections import defaultdict
-from time import sleep
VMX_EXIT_REASONS = {
'EXCEPTION_NMI': 0,
@@ -225,6 +225,7 @@ IOCTL_NUMBERS = {
'RESET': 0x00002403,
}
+
class Arch(object):
"""Encapsulates global architecture specific data.
@@ -255,12 +256,14 @@ class Arch(object):
return ArchX86(SVM_EXIT_REASONS)
return
+
class ArchX86(Arch):
def __init__(self, exit_reasons):
self.sc_perf_evt_open = 298
self.ioctl_numbers = IOCTL_NUMBERS
self.exit_reasons = exit_reasons
+
class ArchPPC(Arch):
def __init__(self):
self.sc_perf_evt_open = 319
@@ -275,12 +278,14 @@ class ArchPPC(Arch):
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
self.exit_reasons = {}
+
class ArchA64(Arch):
def __init__(self):
self.sc_perf_evt_open = 241
self.ioctl_numbers = IOCTL_NUMBERS
self.exit_reasons = AARCH64_EXIT_REASONS
+
class ArchS390(Arch):
def __init__(self):
self.sc_perf_evt_open = 331
@@ -316,6 +321,61 @@ def parse_int_list(list_string):
return integers
+def get_pid_from_gname(gname):
+ """Fuzzy function to convert guest name to QEMU process pid.
+
+ Returns a list of potential pids, can be empty if no match found.
+ Throws an exception on processing errors.
+
+ """
+ pids = []
+ try:
+ child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
+ stdout=subprocess.PIPE)
+ except:
+ raise Exception
+ for line in child.stdout:
+ line = line.lstrip().split(' ', 1)
+ # perform a sanity check before calling the more expensive
+ # function to possibly extract the guest name
+ if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]):
+ pids.append(int(line[0]))
+ child.stdout.close()
+
+ return pids
+
+
+def get_gname_from_pid(pid):
+ """Returns the guest name for a QEMU process pid.
+
+ Extracts the guest name from the QEMU comma line by processing the '-name'
+ option. Will also handle names specified out of sequence.
+
+ """
+ name = ''
+ try:
+ line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
+ parms = line[line.index('-name') + 1].split(',')
+ while '' in parms:
+ # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
+ # ['foo', '', 'bar'], which we revert here
+ idx = parms.index('')
+ parms[idx - 1] += ',' + parms[idx + 1]
+ del parms[idx:idx+2]
+ # the '-name' switch allows for two ways to specify the guest name,
+ # where the plain name overrides the name specified via 'guest='
+ for arg in parms:
+ if '=' not in arg:
+ name = arg
+ break
+ if arg[:6] == 'guest=':
+ name = arg[6:]
+ except (ValueError, IOError, IndexError):
+ pass
+
+ return name
+
+
def get_online_cpus():
"""Returns a list of cpu id integers."""
with open('/sys/devices/system/cpu/online') as cpu_list:
@@ -342,6 +402,7 @@ def get_filters():
libc = ctypes.CDLL('libc.so.6', use_errno=True)
syscall = libc.syscall
+
class perf_event_attr(ctypes.Structure):
"""Struct that holds the necessary data to set up a trace event.
@@ -370,6 +431,7 @@ class perf_event_attr(ctypes.Structure):
self.size = ctypes.sizeof(self)
self.read_format = PERF_FORMAT_GROUP
+
def perf_event_open(attr, pid, cpu, group_fd, flags):
"""Wrapper for the sys_perf_evt_open() syscall.
@@ -395,6 +457,7 @@ PERF_FORMAT_GROUP = 1 << 3
PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
+
class Group(object):
"""Represents a perf event group."""
@@ -427,6 +490,7 @@ class Group(object):
struct.unpack(read_format,
os.read(self.events[0].fd, length))))
+
class Event(object):
"""Represents a performance event and manages its life cycle."""
def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
@@ -510,6 +574,7 @@ class Event(object):
"""Resets the count of the trace event in the kernel."""
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
+
class TracepointProvider(object):
"""Data provider for the stats class.
@@ -551,6 +616,7 @@ class TracepointProvider(object):
def setup_traces(self):
"""Creates all event and group objects needed to be able to retrieve
data."""
+ fields = self.get_available_fields()
if self._pid > 0:
# Fetch list of all threads of the monitored pid, as qemu
# starts a thread for each vcpu.
@@ -561,7 +627,7 @@ class TracepointProvider(object):
# The constant is needed as a buffer for python libs, std
# streams and other files that the script opens.
- newlim = len(groupids) * len(self._fields) + 50
+ newlim = len(groupids) * len(fields) + 50
try:
softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
@@ -577,7 +643,7 @@ class TracepointProvider(object):
for groupid in groupids:
group = Group()
- for name in self._fields:
+ for name in fields:
tracepoint = name
tracefilter = None
match = re.match(r'(.*)\((.*)\)', name)
@@ -650,13 +716,23 @@ class TracepointProvider(object):
ret[name] += val
return ret
+ def reset(self):
+ """Reset all field counters"""
+ for group in self.group_leaders:
+ for event in group.events:
+ event.reset()
+
+
class DebugfsProvider(object):
"""Provides data from the files that KVM creates in the kvm debugfs
folder."""
def __init__(self):
self._fields = self.get_available_fields()
+ self._baseline = {}
self._pid = 0
self.do_read = True
+ self.paths = []
+ self.reset()
def get_available_fields(self):
""""Returns a list of available fields.
@@ -673,6 +749,7 @@ class DebugfsProvider(object):
@fields.setter
def fields(self, fields):
self._fields = fields
+ self.reset()
@property
def pid(self):
@@ -690,10 +767,11 @@ class DebugfsProvider(object):
self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
else:
- self.paths = ['']
+ self.paths = []
self.do_read = True
+ self.reset()
- def read(self):
+ def read(self, reset=0):
"""Returns a dict with format:'file name / field -> current value'."""
results = {}
@@ -701,10 +779,22 @@ class DebugfsProvider(object):
if not self.do_read:
return results
- for path in self.paths:
+ paths = self.paths
+ if self._pid == 0:
+ paths = []
+ for entry in os.walk(PATH_DEBUGFS_KVM):
+ for dir in entry[1]:
+ paths.append(dir)
+ for path in paths:
for field in self._fields:
- results[field] = results.get(field, 0) \
- + self.read_field(field, path)
+ value = self.read_field(field, path)
+ key = path + field
+ if reset:
+ self._baseline[key] = value
+ if self._baseline.get(key, -1) == -1:
+ self._baseline[key] = value
+ results[field] = (results.get(field, 0) + value -
+ self._baseline.get(key, 0))
return results
@@ -718,6 +808,12 @@ class DebugfsProvider(object):
except IOError:
return 0
+ def reset(self):
+ """Reset field counters"""
+ self._baseline = {}
+ self.read(1)
+
+
class Stats(object):
"""Manages the data providers and the data they provide.
@@ -753,14 +849,20 @@ class Stats(object):
for provider in self.providers:
provider.pid = self._pid_filter
+ def reset(self):
+ self.values = {}
+ for provider in self.providers:
+ provider.reset()
+
@property
def fields_filter(self):
return self._fields_filter
@fields_filter.setter
def fields_filter(self, fields_filter):
- self._fields_filter = fields_filter
- self.update_provider_filters()
+ if fields_filter != self._fields_filter:
+ self._fields_filter = fields_filter
+ self.update_provider_filters()
@property
def pid_filter(self):
@@ -768,9 +870,10 @@ class Stats(object):
@pid_filter.setter
def pid_filter(self, pid):
- self._pid_filter = pid
- self.values = {}
- self.update_provider_pid()
+ if pid != self._pid_filter:
+ self._pid_filter = pid
+ self.values = {}
+ self.update_provider_pid()
def get(self):
"""Returns a dict with field -> (value, delta to last value) of all
@@ -778,23 +881,26 @@ class Stats(object):
for provider in self.providers:
new = provider.read()
for key in provider.fields:
- oldval = self.values.get(key, (0, 0))
+ oldval = self.values.get(key, (0, 0))[0]
newval = new.get(key, 0)
- newdelta = None
- if oldval is not None:
- newdelta = newval - oldval[0]
+ newdelta = newval - oldval
self.values[key] = (newval, newdelta)
return self.values
LABEL_WIDTH = 40
NUMBER_WIDTH = 10
+DELAY_INITIAL = 0.25
+DELAY_REGULAR = 3.0
+MAX_GUEST_NAME_LEN = 48
+MAX_REGEX_LEN = 44
+DEFAULT_REGEX = r'^[^\(]*$'
+
class Tui(object):
"""Instruments curses to draw a nice text ui."""
def __init__(self, stats):
self.stats = stats
self.screen = None
- self.drilldown = False
self.update_drilldown()
def __enter__(self):
@@ -809,7 +915,14 @@ class Tui(object):
# return from C start_color() is ignorable.
try:
curses.start_color()
- except:
+ except curses.error:
+ pass
+
+ # Hide cursor in extra statement as some monochrome terminals
+ # might support hiding but not colors.
+ try:
+ curses.curs_set(0)
+ except curses.error:
pass
curses.use_default_colors()
@@ -827,36 +940,60 @@ class Tui(object):
def update_drilldown(self):
"""Sets or removes a filter that only allows fields without braces."""
if not self.stats.fields_filter:
- self.stats.fields_filter = r'^[^\(]*$'
+ self.stats.fields_filter = DEFAULT_REGEX
- elif self.stats.fields_filter == r'^[^\(]*$':
+ elif self.stats.fields_filter == DEFAULT_REGEX:
self.stats.fields_filter = None
def update_pid(self, pid):
"""Propagates pid selection to stats object."""
self.stats.pid_filter = pid
- def refresh(self, sleeptime):
- """Refreshes on-screen data."""
+ def refresh_header(self, pid=None):
+ """Refreshes the header."""
+ if pid is None:
+ pid = self.stats.pid_filter
self.screen.erase()
- if self.stats.pid_filter > 0:
- self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
- .format(self.stats.pid_filter),
- curses.A_BOLD)
+ gname = get_gname_from_pid(pid)
+ if gname:
+ gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
+ if len(gname) > MAX_GUEST_NAME_LEN
+ else gname))
+ if pid > 0:
+ self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
+ .format(pid, gname), curses.A_BOLD)
else:
self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+ if self.stats.fields_filter and self.stats.fields_filter \
+ != DEFAULT_REGEX:
+ regex = self.stats.fields_filter
+ if len(regex) > MAX_REGEX_LEN:
+ regex = regex[:MAX_REGEX_LEN] + '...'
+ self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
self.screen.addstr(2, 1, 'Event')
self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
len('Total'), 'Total')
- self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 -
+ len('%Total'), '%Total')
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 -
len('Current'), 'Current')
+ self.screen.addstr(4, 1, 'Collecting data...')
+ self.screen.refresh()
+
+ def refresh_body(self, sleeptime):
row = 3
+ self.screen.move(row, 0)
+ self.screen.clrtobot()
stats = self.stats.get()
+
def sortkey(x):
if stats[x][1]:
return (-stats[x][1], -stats[x][0])
else:
return (0, -stats[x][0])
+ total = 0.
+ for val in stats.values():
+ total += val[0]
for key in sorted(stats.keys(), key=sortkey):
if row >= self.screen.getmaxyx()[0]:
@@ -869,6 +1006,8 @@ class Tui(object):
col += LABEL_WIDTH
self.screen.addstr(row, col, '%10d' % (values[0],))
col += NUMBER_WIDTH
+ self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,))
+ col += 7
if values[1] is not None:
self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
@@ -893,20 +1032,24 @@ class Tui(object):
regex = self.screen.getstr()
curses.noecho()
if len(regex) == 0:
+ self.stats.fields_filter = DEFAULT_REGEX
+ self.refresh_header()
return
try:
re.compile(regex)
self.stats.fields_filter = regex
+ self.refresh_header()
return
except re.error:
continue
- def show_vm_selection(self):
+ def show_vm_selection_by_pid(self):
"""Draws PID selection mask.
Asks for a pid until a valid pid or 0 has been entered.
"""
+ msg = ''
while True:
self.screen.erase()
self.screen.addstr(0, 0,
@@ -915,6 +1058,7 @@ class Tui(object):
self.screen.addstr(1, 0,
'This might limit the shown data to the trace '
'statistics.')
+ self.screen.addstr(5, 0, msg)
curses.echo()
self.screen.addstr(3, 0, "Pid [0 or pid]: ")
@@ -922,60 +1066,128 @@ class Tui(object):
curses.noecho()
try:
- pid = int(pid)
-
- if pid == 0:
- self.update_pid(pid)
- break
- else:
- if not os.path.isdir(os.path.join('/proc/', str(pid))):
+ if len(pid) > 0:
+ pid = int(pid)
+ if pid != 0 and not os.path.isdir(os.path.join('/proc/',
+ str(pid))):
+ msg = '"' + str(pid) + '": Not a running process'
continue
- else:
- self.update_pid(pid)
- break
+ else:
+ pid = 0
+ self.refresh_header(pid)
+ self.update_pid(pid)
+ break
except ValueError:
+ msg = '"' + str(pid) + '": Not a valid pid'
continue
+ def show_vm_selection_by_guest_name(self):
+ """Draws guest selection mask.
+
+ Asks for a guest name until a valid guest name or '' is entered.
+
+ """
+ msg = ''
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0,
+ 'Show statistics for specific guest.',
+ curses.A_BOLD)
+ self.screen.addstr(1, 0,
+ 'This might limit the shown data to the trace '
+ 'statistics.')
+ self.screen.addstr(5, 0, msg)
+ curses.echo()
+ self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
+ gname = self.screen.getstr()
+ curses.noecho()
+
+ if not gname:
+ self.refresh_header(0)
+ self.update_pid(0)
+ break
+ else:
+ pids = []
+ try:
+ pids = get_pid_from_gname(gname)
+ except:
+ msg = '"' + gname + '": Internal error while searching, ' \
+ 'use pid filter instead'
+ continue
+ if len(pids) == 0:
+ msg = '"' + gname + '": Not an active guest'
+ continue
+ if len(pids) > 1:
+ msg = '"' + gname + '": Multiple matches found, use pid ' \
+ 'filter instead'
+ continue
+ self.refresh_header(pids[0])
+ self.update_pid(pids[0])
+ break
+
def show_stats(self):
"""Refreshes the screen and processes user input."""
- sleeptime = 0.25
+ sleeptime = DELAY_INITIAL
+ self.refresh_header()
while True:
- self.refresh(sleeptime)
+ self.refresh_body(sleeptime)
curses.halfdelay(int(sleeptime * 10))
- sleeptime = 3
+ sleeptime = DELAY_REGULAR
try:
char = self.screen.getkey()
if char == 'x':
- self.drilldown = not self.drilldown
+ self.refresh_header()
self.update_drilldown()
+ sleeptime = DELAY_INITIAL
if char == 'q':
break
+ if char == 'c':
+ self.stats.fields_filter = DEFAULT_REGEX
+ self.refresh_header(0)
+ self.update_pid(0)
+ sleeptime = DELAY_INITIAL
if char == 'f':
self.show_filter_selection()
+ sleeptime = DELAY_INITIAL
+ if char == 'g':
+ self.show_vm_selection_by_guest_name()
+ sleeptime = DELAY_INITIAL
if char == 'p':
- self.show_vm_selection()
+ self.show_vm_selection_by_pid()
+ sleeptime = DELAY_INITIAL
+ if char == 'r':
+ self.refresh_header()
+ self.stats.reset()
+ sleeptime = DELAY_INITIAL
except KeyboardInterrupt:
break
except curses.error:
continue
+
def batch(stats):
"""Prints statistics in a key, value format."""
- s = stats.get()
- time.sleep(1)
- s = stats.get()
- for key in sorted(s.keys()):
- values = s[key]
- print '%-42s%10d%10d' % (key, values[0], values[1])
+ try:
+ s = stats.get()
+ time.sleep(1)
+ s = stats.get()
+ for key in sorted(s.keys()):
+ values = s[key]
+ print '%-42s%10d%10d' % (key, values[0], values[1])
+ except KeyboardInterrupt:
+ pass
+
def log(stats):
"""Prints statistics as reiterating key block, multiple value blocks."""
keys = sorted(stats.get().iterkeys())
+
def banner():
for k in keys:
print '%s' % k,
print
+
def statline():
s = stats.get()
for k in keys:
@@ -984,11 +1196,15 @@ def log(stats):
line = 0
banner_repeat = 20
while True:
- time.sleep(1)
- if line % banner_repeat == 0:
- banner()
- statline()
- line += 1
+ try:
+ time.sleep(1)
+ if line % banner_repeat == 0:
+ banner()
+ statline()
+ line += 1
+ except KeyboardInterrupt:
+ break
+
def get_options():
"""Returns processed program arguments."""
@@ -1009,6 +1225,16 @@ Requirements:
CAP_SYS_ADMIN and perf events are used.
- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
the large number of files that are possibly opened.
+
+Interactive Commands:
+ c clear filter
+ f filter by regular expression
+ g filter by guest name
+ p filter by PID
+ q quit
+ x toggle reporting of stats for individual child trace events
+ r reset stats
+Press any other key to refresh statistics immediately.
"""
class PlainHelpFormatter(optparse.IndentedHelpFormatter):
@@ -1018,6 +1244,22 @@ Requirements:
else:
return ""
+ def cb_guest_to_pid(option, opt, val, parser):
+ try:
+ pids = get_pid_from_gname(val)
+ except:
+ raise optparse.OptionValueError('Error while searching for guest '
+ '"{}", use "-p" to specify a pid '
+ 'instead'.format(val))
+ if len(pids) == 0:
+ raise optparse.OptionValueError('No guest by the name "{}" '
+ 'found'.format(val))
+ if len(pids) > 1:
+ raise optparse.OptionValueError('Multiple processes found (pids: '
+ '{}) - use "-p" to specify a pid '
+ 'instead'.format(" ".join(pids)))
+ parser.values.pid = pids[0]
+
optparser = optparse.OptionParser(description=description_text,
formatter=PlainHelpFormatter())
optparser.add_option('-1', '--once', '--batch',
@@ -1051,15 +1293,24 @@ Requirements:
help='fields to display (regex)',
)
optparser.add_option('-p', '--pid',
- action='store',
- default=0,
- type=int,
- dest='pid',
- help='restrict statistics to pid',
- )
+ action='store',
+ default=0,
+ type='int',
+ dest='pid',
+ help='restrict statistics to pid',
+ )
+ optparser.add_option('-g', '--guest',
+ action='callback',
+ type='string',
+ dest='pid',
+ metavar='GUEST',
+ help='restrict statistics to guest by name',
+ callback=cb_guest_to_pid,
+ )
(options, _) = optparser.parse_args(sys.argv)
return options
+
def get_providers(options):
"""Returns a list of data providers depending on the passed options."""
providers = []
@@ -1073,6 +1324,7 @@ def get_providers(options):
return providers
+
def check_access(options):
"""Exits if the current user can't access all needed directories."""
if not os.path.exists('/sys/kernel/debug'):
@@ -1086,8 +1338,8 @@ def check_access(options):
"Also ensure, that the kvm modules are loaded.\n")
sys.exit(1)
- if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
- or not options.debugfs):
+ if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
+ not options.debugfs):
sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
"when using the option -t (default).\n"
"If it is enabled, make {0} readable by the "
@@ -1098,10 +1350,11 @@ def check_access(options):
sys.stderr.write("Falling back to debugfs statistics!\n")
options.debugfs = True
- sleep(5)
+ time.sleep(5)
return options
+
def main():
options = get_options()
options = check_access(options)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index b92a153d7115..109431bdc63c 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -18,11 +18,33 @@ state transitions such as guest mode entry and exit.
This tool is useful for observing guest behavior from the host perspective.
Often conclusions about performance or buggy behavior can be drawn from the
output.
+While running in regular mode, use any of the keys listed in section
+'Interactive Commands' below.
+Use batch and logging modes for scripting purposes.
The set of KVM kernel module trace events may be specific to the kernel version
or architecture. It is best to check the KVM kernel module source code for the
meaning of events.
+INTERACTIVE COMMANDS
+--------------------
+[horizontal]
+*c*:: clear filter
+
+*f*:: filter by regular expression
+
+*g*:: filter by guest name
+
+*p*:: filter by PID
+
+*q*:: quit
+
+*r*:: reset stats
+
+*x*:: toggle reporting of stats for child trace events
+
+Press any other key to refresh statistics immediately.
+
OPTIONS
-------
-1::
@@ -46,6 +68,10 @@ OPTIONS
--pid=<pid>::
limit statistics to one virtual machine (pid)
+-g<guest>::
+--guest=<guest_name>::
+ limit statistics to one virtual machine (guest name)
+
-f<fields>::
--fields=<fields>::
fields to display (regex)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 70e389bc4af7..9b4d8ba22fed 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
/* Last entry */
if (curr->end == curr->start)
- curr->end = roundup(curr->start, 4096);
+ curr->end = roundup(curr->start, 4096) + 4096;
}
void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4b498265dae6..67531f47781b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,12 +1,14 @@
LIBDIR := ../../../lib
BPFOBJ := $(LIBDIR)/bpf/bpf.o
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
TEST_PROGS := test_kmod.sh
+all: $(TEST_GEN_PROGS)
+
.PHONY: all clean force
# force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e1f5b9eea1e8..d1555e4240c0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,8 @@
* License as published by the Free Software Foundation.
*/
+#include <asm/types.h>
+#include <linux/types.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -4583,10 +4585,12 @@ static bool is_admin(void)
cap_flag_value_t sysadmin = CAP_CLEAR;
const cap_value_t cap_val = CAP_SYS_ADMIN;
+#ifdef CAP_IS_SUPPORTED
if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
perror("cap_get_flag");
return false;
}
+#endif
caps = cap_get_proc();
if (!caps) {
perror("cap_get_proc");
diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
index d828bfb6ef2d..54064ced9e95 100644
--- a/tools/testing/selftests/powerpc/include/vsx_asm.h
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -16,56 +16,56 @@
*/
FUNC_START(load_vsx)
li r5,0
- lxvx vs20,r5,r3
+ lxvd2x vs20,r5,r3
addi r5,r5,16
- lxvx vs21,r5,r3
+ lxvd2x vs21,r5,r3
addi r5,r5,16
- lxvx vs22,r5,r3
+ lxvd2x vs22,r5,r3
addi r5,r5,16
- lxvx vs23,r5,r3
+ lxvd2x vs23,r5,r3
addi r5,r5,16
- lxvx vs24,r5,r3
+ lxvd2x vs24,r5,r3
addi r5,r5,16
- lxvx vs25,r5,r3
+ lxvd2x vs25,r5,r3
addi r5,r5,16
- lxvx vs26,r5,r3
+ lxvd2x vs26,r5,r3
addi r5,r5,16
- lxvx vs27,r5,r3
+ lxvd2x vs27,r5,r3
addi r5,r5,16
- lxvx vs28,r5,r3
+ lxvd2x vs28,r5,r3
addi r5,r5,16
- lxvx vs29,r5,r3
+ lxvd2x vs29,r5,r3
addi r5,r5,16
- lxvx vs30,r5,r3
+ lxvd2x vs30,r5,r3
addi r5,r5,16
- lxvx vs31,r5,r3
+ lxvd2x vs31,r5,r3
blr
FUNC_END(load_vsx)
FUNC_START(store_vsx)
li r5,0
- stxvx vs20,r5,r3
+ stxvd2x vs20,r5,r3
addi r5,r5,16
- stxvx vs21,r5,r3
+ stxvd2x vs21,r5,r3
addi r5,r5,16
- stxvx vs22,r5,r3
+ stxvd2x vs22,r5,r3
addi r5,r5,16
- stxvx vs23,r5,r3
+ stxvd2x vs23,r5,r3
addi r5,r5,16
- stxvx vs24,r5,r3
+ stxvd2x vs24,r5,r3
addi r5,r5,16
- stxvx vs25,r5,r3
+ stxvd2x vs25,r5,r3
addi r5,r5,16
- stxvx vs26,r5,r3
+ stxvd2x vs26,r5,r3
addi r5,r5,16
- stxvx vs27,r5,r3
+ stxvd2x vs27,r5,r3
addi r5,r5,16
- stxvx vs28,r5,r3
+ stxvd2x vs28,r5,r3
addi r5,r5,16
- stxvx vs29,r5,r3
+ stxvd2x vs29,r5,r3
addi r5,r5,16
- stxvx vs30,r5,r3
+ stxvd2x vs30,r5,r3
addi r5,r5,16
- stxvx vs31,r5,r3
+ stxvd2x vs31,r5,r3
blr
FUNC_END(store_vsx)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 571b64a01c50..8d1da1af4b09 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- }
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 3654b4c835ef..2a5db1352722 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
+ struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
+
+ /*
+ * The vcpu parameter here can mean multiple things depending on how
+ * this function is called; when handling a trap from the kernel it
+ * depends on the GIC version, and these functions are also called as
+ * part of save/restore from userspace.
+ *
+ * Therefore, we have to figure out the requester in a reliable way.
+ *
+ * When accessing VGIC state from user space, the requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
+ */
+ requester_vcpu = kvm_arm_get_running_vcpu();
+
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
+ * synced back the LR state to the struct vgic_irq.
*
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
+ * As long as the conditions below are true, we know the VCPU thread
+ * may be on its way back from the guest (we kicked the VCPU thread in
+ * vgic_change_active_prepare) and still has to sync back this IRQ,
+ * so we release and re-acquire the spin_lock to let the other thread
+ * sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index edc6ee2dc852..be0f4c3e0142 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
+ * Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+ ICC_SRE_EL1_DFB |
+ ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..a8d540398bbd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_srcu(&kvm->irq_srcu);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
#endif
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
+ if (kvm->buses[bus_idx])
+ kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3bcc9990adf7..cc30d01a56be 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
struct kvm_kernel_irq_routing_entry *ei;
+ int r;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
@@ -153,20 +153,19 @@ static int setup_routing_entry(struct kvm *kvm,
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
- return r;
+ return -EINVAL;
e->gsi = ue->gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
- goto out;
+ return r;
if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
- r = 0;
-out:
- return r;
+
+ return 0;
}
void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a17d78759727..357e67cba32e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -727,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
- kvm_io_bus_destroy(kvm->buses[i]);
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ if (kvm->buses[i])
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm->buses[i] = NULL;
+ }
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -1016,8 +1019,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
old_memslots = install_new_memslots(kvm, as_id, slots);
- /* slot was deleted or moved, clear iommu mapping */
- kvm_iommu_unmap_pages(kvm, &old);
/* From this point no new shadow pages pointing to a deleted,
* or moved, memslot will be created.
*
@@ -1052,21 +1053,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_free_memslot(kvm, &old, &new);
kvfree(old_memslots);
-
- /*
- * IOMMU mapping: New slots need to be mapped. Old slots need to be
- * un-mapped and re-mapped if their base changes. Since base change
- * unmapping is handled above with slot deletion, mapping alone is
- * needed here. Anything else the iommu might care about for existing
- * slots (size changes, userspace addr changes and read-only flag
- * changes) is disallowed above, so any other attribute changes getting
- * here can be skipped.
- */
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
- r = kvm_iommu_map_pages(kvm, &new);
- return r;
- }
-
return 0;
out_slots:
@@ -2363,7 +2349,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf)
else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->arch.pio_data);
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
#endif
@@ -2932,6 +2918,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
return 1;
+#ifdef CONFIG_KVM_MMIO
+ case KVM_CAP_COALESCED_MMIO:
+ return KVM_COALESCED_MMIO_PAGE_OFFSET;
+#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
@@ -2981,7 +2971,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
}
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
@@ -3079,8 +3069,11 @@ static long kvm_vm_ioctl(struct file *filp,
routing.nr * sizeof(*entries)))
goto out_free_irq_routing;
}
+ /* avoid races with KVM_CREATE_IRQCHIP on x86 */
+ mutex_lock(&kvm->lock);
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
+ mutex_unlock(&kvm->lock);
out_free_irq_routing:
vfree(entries);
break;
@@ -3173,7 +3166,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
kvm = kvm_create_vm(type);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
r = kvm_coalesced_mmio_init(kvm);
if (r < 0) {
kvm_put_kvm(kvm);
@@ -3226,7 +3219,7 @@ static long kvm_dev_ioctl(struct file *filp,
#ifdef CONFIG_X86
r += PAGE_SIZE; /* pio data page */
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
r += PAGE_SIZE; /* coalesced mmio ring page */
#endif
break;
@@ -3474,6 +3467,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3491,6 +3486,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3541,6 +3538,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3553,6 +3552,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
+ if (!bus)
+ return -ENOMEM;
+
/* exclude ioeventfd which is limited by maximum fd */
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
@@ -3572,37 +3574,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i, r;
+ int i;
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- r = -ENOENT;
+ if (!bus)
+ return;
+
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
- r = 0;
break;
}
- if (r)
- return r;
+ if (i == bus->dev_count)
+ return;
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
+ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ goto broken;
+ }
memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
new_bus->dev_count--;
memcpy(new_bus->range + i, bus->range + i + 1,
(new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return r;
+ return;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3615,6 +3621,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ if (!bus)
+ goto out_unlock;
dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)