aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/00-INDEX3
-rw-r--r--Documentation/virtual/kvm/amd-memory-encryption.rst247
-rw-r--r--Documentation/virtual/kvm/api.txt54
-rw-r--r--Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt187
-rw-r--r--Documentation/virtual/kvm/cpuid.txt4
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile4
-rw-r--r--arch/arm/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_hyp.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h99
-rw-r--r--arch/arm/include/asm/pgtable.h4
-rw-r--r--arch/arm/kvm/hyp/switch.c1
-rw-r--r--arch/arm/kvm/hyp/tlb.c1
-rw-r--r--arch/arm64/include/asm/assembler.h21
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h36
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h4
-rw-r--r--arch/arm64/kvm/guest.c15
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c1
-rw-r--r--arch/arm64/kvm/hyp/switch.c1
-rw-r--r--arch/arm64/kvm/hyp/tlb.c1
-rw-r--r--arch/arm64/mm/cache.S32
-rw-r--r--arch/blackfin/include/uapi/asm/poll.h19
-rw-r--r--arch/cris/arch-v10/drivers/gpio.c2
-rw-r--r--arch/cris/arch-v10/drivers/sync_serial.c8
-rw-r--r--arch/cris/arch-v32/drivers/sync_serial.c10
-rw-r--r--arch/frv/include/uapi/asm/poll.h19
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--arch/m68k/include/uapi/asm/poll.h19
-rw-r--r--arch/mips/include/uapi/asm/poll.h19
-rw-r--r--arch/mips/kernel/rtlx.c4
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/mips.c67
-rw-r--r--arch/nios2/boot/dts/3c120_devboard.dts16
-rw-r--r--arch/nios2/configs/10m50_defconfig1
-rw-r--r--arch/nios2/configs/3c120_defconfig1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h14
-rw-r--r--arch/powerpc/include/asm/kvm_host.h8
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h6
-rw-r--r--arch/powerpc/include/asm/xive.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/pci-common.c5
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/book3s.c24
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c38
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c70
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S231
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S4
-rw-r--r--arch/powerpc/kvm/book3s_pr.c20
-rw-r--r--arch/powerpc/kvm/book3s_xive.c109
-rw-r--r--arch/powerpc/kvm/book3s_xive.h15
-rw-r--r--arch/powerpc/kvm/booke.c51
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c36
-rw-r--r--arch/powerpc/kvm/powerpc.c200
-rw-r--r--arch/powerpc/kvm/timing.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c8
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c10
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c18
-rw-r--r--arch/s390/include/asm/bitops.h5
-rw-r--r--arch/s390/include/asm/css_chars.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h126
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/interrupt.c288
-rw-r--r--arch/s390/kvm/kvm-s390.c209
-rw-r--r--arch/s390/kvm/kvm-s390.h22
-rw-r--r--arch/s390/kvm/priv.c38
-rw-r--r--arch/s390/kvm/sigp.c18
-rw-r--r--arch/s390/kvm/vsie.c91
-rw-r--r--arch/s390/mm/gmap.c44
-rw-r--r--arch/sparc/include/uapi/asm/poll.h28
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/hyperv/hv_init.c123
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h22
-rw-r--r--arch/x86/include/asm/mshyperv.h32
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/pat.h2
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h27
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c4
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/irq.c9
-rw-r--r--arch/x86/kernel/kvm.c49
-rw-r--r--arch/x86/kvm/Kconfig8
-rw-r--r--arch/x86/kvm/cpuid.c22
-rw-r--r--arch/x86/kvm/emulate.c62
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/kvm/svm.c1199
-rw-r--r--arch/x86/kvm/vmx.c756
-rw-r--r--arch/x86/kvm/vmx_shadow_fields.h77
-rw-r--r--arch/x86/kvm/x86.c338
-rw-r--r--arch/x86/kvm/x86.h33
-rw-r--r--arch/x86/mm/pat.c19
-rw-r--r--arch/xtensa/include/asm/kasan.h2
-rw-r--r--arch/xtensa/include/uapi/asm/poll.h21
-rw-r--r--block/bfq-iosched.c107
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-wbt.c10
-rw-r--r--block/bsg.c4
-rw-r--r--crypto/af_alg.c16
-rw-r--r--drivers/acpi/acpi_dbg.c4
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/bluetooth/hci_vhci.c4
-rw-r--r--drivers/char/apm-emulation.c2
-rw-r--r--drivers/char/dsp56k.c2
-rw-r--r--drivers/char/dtlk.c6
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/ipmi/bt-bmc.c4
-rw-r--r--drivers/char/ipmi/ipmi_devintf.c2
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c2
-rw-r--r--drivers/char/pcmcia/cm4040_cs.c4
-rw-r--r--drivers/char/ppdev.c2
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/char/rtc.c2
-rw-r--r--drivers/char/snsc.c4
-rw-r--r--drivers/char/sonypi.c2
-rw-r--r--drivers/char/tpm/tpm_vtpm_proxy.c6
-rw-r--r--drivers/char/virtio_console.c8
-rw-r--r--drivers/char/xillybus/xillybus_core.c12
-rw-r--r--drivers/crypto/ccp/Kconfig12
-rw-r--r--drivers/crypto/ccp/Makefile1
-rw-r--r--drivers/crypto/ccp/psp-dev.c805
-rw-r--r--drivers/crypto/ccp/psp-dev.h83
-rw-r--r--drivers/crypto/ccp/sp-dev.c35
-rw-r--r--drivers/crypto/ccp/sp-dev.h28
-rw-r--r--drivers/crypto/ccp/sp-pci.c52
-rw-r--r--drivers/dma-buf/dma-buf.c26
-rw-r--r--drivers/dma-buf/sync_file.c2
-rw-r--r--drivers/firewire/core-cdev.c4
-rw-r--r--drivers/firewire/nosy.c4
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/gpu/drm/drm_file.c2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c10
-rw-r--r--drivers/gpu/vga/vgaarb.c2
-rw-r--r--drivers/hid/hid-debug.c4
-rw-r--r--drivers/hid/hid-roccat.c4
-rw-r--r--drivers/hid/hid-sensor-custom.c2
-rw-r--r--drivers/hid/hidraw.c4
-rw-r--r--drivers/hid/uhid.c2
-rw-r--r--drivers/hid/usbhid/hiddev.c4
-rw-r--r--drivers/hsi/clients/cmt_speech.c4
-rw-r--r--drivers/hv/hv_utils_transport.c4
-rw-r--r--drivers/iio/buffer/industrialio-buffer-dma.c4
-rw-r--r--drivers/iio/industrialio-buffer.c6
-rw-r--r--drivers/iio/industrialio-event.c6
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/ucma.c2
-rw-r--r--drivers/infiniband/core/user_mad.c4
-rw-r--r--drivers/infiniband/core/uverbs_main.c2
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c8
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c5
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c2
-rw-r--r--drivers/input/evdev.c6
-rw-r--r--drivers/input/input.c2
-rw-r--r--drivers/input/joydev.c4
-rw-r--r--drivers/input/misc/hp_sdc_rtc.c2
-rw-r--r--drivers/input/misc/uinput.c2
-rw-r--r--drivers/input/mousedev.c4
-rw-r--r--drivers/input/serio/serio_raw.c4
-rw-r--r--drivers/input/serio/userio.c2
-rw-r--r--drivers/isdn/capi/capi.c6
-rw-r--r--drivers/isdn/divert/divert_procfs.c4
-rw-r--r--drivers/isdn/hardware/eicon/divamnt.c4
-rw-r--r--drivers/isdn/hardware/eicon/divasi.c10
-rw-r--r--drivers/isdn/hardware/eicon/divasmain.c4
-rw-r--r--drivers/isdn/hardware/eicon/divasproc.c2
-rw-r--r--drivers/isdn/hysdn/hysdn_proclog.c2
-rw-r--r--drivers/isdn/i4l/isdn_common.c12
-rw-r--r--drivers/isdn/i4l/isdn_ppp.c8
-rw-r--r--drivers/isdn/mISDN/socket.c5
-rw-r--r--drivers/isdn/mISDN/timerdev.c4
-rw-r--r--drivers/leds/uleds.c2
-rw-r--r--drivers/macintosh/smu.c2
-rw-r--r--drivers/macintosh/via-pmu.c2
-rw-r--r--drivers/mailbox/mailbox-test.c2
-rw-r--r--drivers/md/bcache/alloc.c4
-rw-r--r--drivers/md/bcache/bcache.h9
-rw-r--r--drivers/md/bcache/btree.c9
-rw-r--r--drivers/md/bcache/journal.c52
-rw-r--r--drivers/md/bcache/super.c25
-rw-r--r--drivers/md/bcache/sysfs.c34
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/bcache/writeback.c9
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-ioctl.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/media/cec/cec-api.c8
-rw-r--r--drivers/media/common/saa7146/saa7146_fops.c6
-rw-r--r--drivers/media/common/siano/smsdvb-debugfs.c2
-rw-r--r--drivers/media/common/videobuf2/videobuf2-core.c30
-rw-r--r--drivers/media/common/videobuf2/videobuf2-v4l2.c8
-rw-r--r--drivers/media/dvb-core/dmxdev.c14
-rw-r--r--drivers/media/dvb-core/dvb_ca_en50221.c4
-rw-r--r--drivers/media/dvb-core/dvb_frontend.c2
-rw-r--r--drivers/media/firewire/firedtv-ci.c2
-rw-r--r--drivers/media/i2c/saa6588.c2
-rw-r--r--drivers/media/media-devnode.c2
-rw-r--r--drivers/media/pci/bt8xx/bttv-driver.c22
-rw-r--r--drivers/media/pci/cx18/cx18-fileops.c16
-rw-r--r--drivers/media/pci/ddbridge/ddbridge-core.c4
-rw-r--r--drivers/media/pci/ivtv/ivtv-fileops.c16
-rw-r--r--drivers/media/pci/meye/meye.c2
-rw-r--r--drivers/media/pci/saa7164/saa7164-encoder.c6
-rw-r--r--drivers/media/pci/saa7164/saa7164-vbi.c2
-rw-r--r--drivers/media/pci/ttpci/av7110_av.c10
-rw-r--r--drivers/media/pci/ttpci/av7110_ca.c4
-rw-r--r--drivers/media/pci/zoran/zoran_driver.c16
-rw-r--r--drivers/media/platform/fsl-viu.c4
-rw-r--r--drivers/media/platform/s5p-mfc/s5p_mfc.c8
-rw-r--r--drivers/media/platform/soc_camera/soc_camera.c4
-rw-r--r--drivers/media/platform/vivid/vivid-radio-rx.c2
-rw-r--r--drivers/media/platform/vivid/vivid-radio-tx.c2
-rw-r--r--drivers/media/radio/radio-cadet.c4
-rw-r--r--drivers/media/radio/radio-si476x.c6
-rw-r--r--drivers/media/radio/radio-wl1273.c4
-rw-r--r--drivers/media/radio/si470x/radio-si470x-common.c4
-rw-r--r--drivers/media/radio/wl128x/fmdrv_v4l2.c2
-rw-r--r--drivers/media/rc/lirc_dev.c12
-rw-r--r--drivers/media/usb/cpia2/cpia2_core.c4
-rw-r--r--drivers/media/usb/cx231xx/cx231xx-417.c4
-rw-r--r--drivers/media/usb/cx231xx/cx231xx-video.c10
-rw-r--r--drivers/media/usb/gspca/gspca.c12
-rw-r--r--drivers/media/usb/hdpvr/hdpvr-video.c4
-rw-r--r--drivers/media/usb/pvrusb2/pvrusb2-v4l2.c6
-rw-r--r--drivers/media/usb/stkwebcam/stk-webcam.c4
-rw-r--r--drivers/media/usb/tm6000/tm6000-video.c14
-rw-r--r--drivers/media/v4l2-core/v4l2-ctrls.c2
-rw-r--r--drivers/media/v4l2-core/v4l2-dev.c2
-rw-r--r--drivers/media/v4l2-core/v4l2-mem2mem.c14
-rw-r--r--drivers/media/v4l2-core/v4l2-subdev.c4
-rw-r--r--drivers/media/v4l2-core/videobuf-core.c10
-rw-r--r--drivers/mfd/ab8500-debugfs.c2
-rw-r--r--drivers/misc/cxl/file.c4
-rw-r--r--drivers/misc/hpilo.c4
-rw-r--r--drivers/misc/lis3lv02d/lis3lv02d.c2
-rw-r--r--drivers/misc/mei/main.c12
-rw-r--r--drivers/misc/mic/cosm/cosm_scif_server.c16
-rw-r--r--drivers/misc/mic/cosm_client/cosm_scif_client.c4
-rw-r--r--drivers/misc/mic/scif/scif_api.c24
-rw-r--r--drivers/misc/mic/vop/vop_vringh.c8
-rw-r--r--drivers/misc/ocxl/file.c4
-rw-r--r--drivers/misc/phantom.c4
-rw-r--r--drivers/misc/vmw_vmci/vmci_host.c2
-rw-r--r--drivers/net/ieee802154/ca8210.c4
-rw-r--r--drivers/net/ppp/ppp_generic.c8
-rw-r--r--drivers/net/ppp/pppoe.c6
-rw-r--r--drivers/net/ppp/pptp.c6
-rw-r--r--drivers/net/tap.c10
-rw-r--r--drivers/net/tun.c12
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00debug.c2
-rw-r--r--drivers/pci/switch/switchtec.c6
-rw-r--r--drivers/platform/chrome/chromeos_laptop.c22
-rw-r--r--drivers/platform/chrome/cros_ec_debugfs.c2
-rw-r--r--drivers/platform/chrome/cros_ec_lpc.c44
-rw-r--r--drivers/platform/chrome/cros_ec_proto.c8
-rw-r--r--drivers/platform/chrome/cros_ec_sysfs.c2
-rw-r--r--drivers/platform/goldfish/goldfish_pipe.c8
-rw-r--r--drivers/platform/mellanox/mlxreg-hotplug.c2
-rw-r--r--drivers/platform/x86/mlx-platform.c372
-rw-r--r--drivers/platform/x86/sony-laptop.c2
-rw-r--r--drivers/pps/pps.c2
-rw-r--r--drivers/ptp/ptp_chardev.c2
-rw-r--r--drivers/rapidio/devices/rio_mport_cdev.c2
-rw-r--r--drivers/rpmsg/qcom_smd.c2
-rw-r--r--drivers/rpmsg/rpmsg_char.c4
-rw-r--r--drivers/rtc/rtc-dev.c2
-rw-r--r--drivers/s390/block/dasd_eer.c2
-rw-r--r--drivers/s390/char/monreader.c4
-rw-r--r--drivers/s390/char/sclp_early.c3
-rw-r--r--drivers/scsi/iscsi_tcp.c14
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.c2
-rw-r--r--drivers/scsi/sg.c12
-rw-r--r--drivers/soc/qcom/qmi_interface.c3
-rw-r--r--drivers/staging/comedi/comedi_fops.c4
-rw-r--r--drivers/staging/comedi/drivers/serial2002.c4
-rw-r--r--drivers/staging/fwserial/fwserial.c2
-rw-r--r--drivers/staging/greybus/tools/loopback_test.c4
-rw-r--r--drivers/staging/ipx/af_ipx.c6
-rw-r--r--drivers/staging/irda/net/af_irda.c22
-rw-r--r--drivers/staging/irda/net/irnet/irnet_ppp.c8
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c7
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c2
-rw-r--r--drivers/staging/media/bcm2048/radio-bcm2048.c2
-rw-r--r--drivers/staging/most/cdev/cdev.c4
-rw-r--r--drivers/staging/most/video/video.c2
-rw-r--r--drivers/staging/speakup/speakup_soft.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_login.c18
-rw-r--r--drivers/tty/n_gsm.c6
-rw-r--r--drivers/tty/n_hdlc.c8
-rw-r--r--drivers/tty/n_r3964.c4
-rw-r--r--drivers/tty/n_tty.c16
-rw-r--r--drivers/tty/pty.c4
-rw-r--r--drivers/tty/tty_io.c14
-rw-r--r--drivers/tty/tty_ldisc.c4
-rw-r--r--drivers/tty/vt/vc_screen.c2
-rw-r--r--drivers/uio/uio.c2
-rw-r--r--drivers/usb/class/cdc-wdm.c8
-rw-r--r--drivers/usb/class/usblp.c4
-rw-r--r--drivers/usb/class/usbtmc.c4
-rw-r--r--drivers/usb/core/devices.c2
-rw-r--r--drivers/usb/core/devio.c6
-rw-r--r--drivers/usb/gadget/function/f_fs.c8
-rw-r--r--drivers/usb/gadget/function/f_hid.c4
-rw-r--r--drivers/usb/gadget/function/f_printer.c4
-rw-r--r--drivers/usb/gadget/legacy/inode.c6
-rw-r--r--drivers/usb/misc/iowarrior.c8
-rw-r--r--drivers/usb/misc/ldusb.c6
-rw-r--r--drivers/usb/misc/legousbtower.c6
-rw-r--r--drivers/usb/mon/mon_bin.c2
-rw-r--r--drivers/vfio/virqfd.c8
-rw-r--r--drivers/vhost/net.c11
-rw-r--r--drivers/vhost/vhost.c10
-rw-r--r--drivers/virt/fsl_hypervisor.c2
-rw-r--r--drivers/xen/evtchn.c6
-rw-r--r--drivers/xen/mcelog.c2
-rw-r--r--drivers/xen/pvcalls-front.c14
-rw-r--r--drivers/xen/pvcalls-front.h2
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c2
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/coda/psdev.c6
-rw-r--r--fs/debugfs/file.c4
-rw-r--r--fs/dlm/lowcomms.c7
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/eventfd.c18
-rw-r--r--fs/eventpoll.c63
-rw-r--r--fs/fcntl.c14
-rw-r--r--fs/fuse/dev.c8
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/kernfs/file.c4
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c6
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c6
-rw-r--r--fs/orangefs/devorangefs-req.c2
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc_namespace.c4
-rw-r--r--fs/select.c10
-rw-r--r--fs/signalfd.c4
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/userfaultfd.c16
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/kvm/arm_vgic.h13
-rw-r--r--include/linux/kvm_host.h14
-rw-r--r--include/linux/net.h8
-rw-r--r--include/linux/poll.h27
-rw-r--r--include/linux/psp-sev.h606
-rw-r--r--include/linux/scif.h16
-rw-r--r--include/media/videobuf2-core.h6
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/sock.h2
-rw-r--r--include/uapi/asm-generic/poll.h40
-rw-r--r--include/uapi/linux/eventpoll.h33
-rw-r--r--include/uapi/linux/kvm.h90
-rw-r--r--include/uapi/linux/psp-sev.h142
-rw-r--r--ipc/mqueue.c4
-rw-r--r--kernel/configs/kvm_guest.config1
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/printk/printk.c6
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/time/posix-clock.c2
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/swapfile.c4
-rw-r--r--net/9p/trans_fd.c26
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/common.c8
-rw-r--r--net/atm/pvc.c5
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/bluetooth/af_bluetooth.c16
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/sco.c5
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/raw.c6
-rw-r--r--net/core/datagram.c16
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/stream.c4
-rw-r--r--net/dccp/proto.c12
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/tcp.c34
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/iucv/af_iucv.c23
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/l2tp/l2tp_ip.c5
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/l2tp/l2tp_ppp.c5
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/netrom/af_netrom.c9
-rw-r--r--net/nfc/llcp_sock.c21
-rw-r--r--net/packet/af_packet.c14
-rw-r--r--net/phonet/socket.c15
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/rds/af_rds.c21
-rw-r--r--net/rds/tcp.c7
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rose/af_rose.c5
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/smc/af_smc.c37
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_tx.c4
-rw-r--r--net/socket.c35
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/svcsock.c13
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/socket.c27
-rw-r--r--net/unix/af_unix.c50
-rw-r--r--net/vmw_vsock/af_vsock.c34
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--security/apparmor/apparmorfs.c2
-rw-r--r--security/tomoyo/audit.c6
-rw-r--r--security/tomoyo/common.c14
-rw-r--r--security/tomoyo/network.c5
-rw-r--r--security/tomoyo/securityfs_if.c4
-rw-r--r--sound/core/compress_offload.c10
-rw-r--r--sound/core/control.c2
-rw-r--r--sound/core/info.c4
-rw-r--r--sound/core/init.c2
-rw-r--r--sound/core/oss/pcm_oss.c4
-rw-r--r--sound/core/pcm_native.c14
-rw-r--r--sound/core/rawmidi.c4
-rw-r--r--sound/core/seq/oss/seq_oss_rw.c4
-rw-r--r--sound/core/seq/seq_clientmgr.c4
-rw-r--r--sound/core/timer.c4
-rw-r--r--sound/firewire/bebob/bebob_hwdep.c2
-rw-r--r--sound/firewire/dice/dice-hwdep.c2
-rw-r--r--sound/firewire/digi00x/digi00x-hwdep.c2
-rw-r--r--sound/firewire/fireface/ff-hwdep.c2
-rw-r--r--sound/firewire/fireworks/fireworks_hwdep.c4
-rw-r--r--sound/firewire/motu/motu-hwdep.c4
-rw-r--r--sound/firewire/oxfw/oxfw-hwdep.c2
-rw-r--r--sound/firewire/tascam/tascam-hwdep.c2
-rw-r--r--sound/oss/dmasound/dmasound_core.c2
-rw-r--r--sound/usb/mixer_quirks.c2
-rw-r--r--sound/usb/usx2y/us122l.c4
-rw-r--r--sound/usb/usx2y/usX2Yhwdep.c4
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arch_timer.c138
-rw-r--r--virt/kvm/arm/arm.c153
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c1
-rw-r--r--virt/kvm/arm/mmu.c64
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c4
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c115
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c29
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c29
-rw-r--r--virt/kvm/arm/vgic/vgic.c41
-rw-r--r--virt/kvm/arm/vgic/vgic.h8
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/kvm_main.c62
495 files changed, 8390 insertions, 2821 deletions
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
index 69fe1a8b7ad1..3da73aabff5a 100644
--- a/Documentation/virtual/kvm/00-INDEX
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -26,3 +26,6 @@ s390-diag.txt
- Diagnose hypercall description (for IBM S/390)
timekeeping.txt
- timekeeping virtualization for x86-based architectures.
+amd-memory-encryption.txt
+ - notes on AMD Secure Encrypted Virtualization feature and SEV firmware
+ command description
diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virtual/kvm/amd-memory-encryption.rst
new file mode 100644
index 000000000000..71d6d257074f
--- /dev/null
+++ b/Documentation/virtual/kvm/amd-memory-encryption.rst
@@ -0,0 +1,247 @@
+======================================
+Secure Encrypted Virtualization (SEV)
+======================================
+
+Overview
+========
+
+Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
+
+SEV is an extension to the AMD-V architecture which supports running
+virtual machines (VMs) under the control of a hypervisor. When enabled,
+the memory contents of a VM will be transparently encrypted with a key
+unique to that VM.
+
+The hypervisor can determine the SEV support through the CPUID
+instruction. The CPUID function 0x8000001f reports information related
+to SEV::
+
+ 0x8000001f[eax]:
+ Bit[1] indicates support for SEV
+ ...
+ [ecx]:
+ Bits[31:0] Number of encrypted guests supported simultaneously
+
+If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+(MSR_K7_HWCR) can be used to determine if it can be enabled::
+
+ 0xc001_0010:
+ Bit[23] 1 = memory encryption can be enabled
+ 0 = memory encryption can not be enabled
+
+ 0xc001_0015:
+ Bit[0] 1 = memory encryption can be enabled
+ 0 = memory encryption can not be enabled
+
+When SEV support is available, it can be enabled in a specific VM by
+setting the SEV bit before executing VMRUN.::
+
+ VMCB[0x90]:
+ Bit[1] 1 = SEV is enabled
+ 0 = SEV is disabled
+
+SEV hardware uses ASIDs to associate a memory encryption key with a VM.
+Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
+defined in the CPUID 0x8000001f[ecx] field.
+
+SEV Key Management
+==================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+KVM implements the following commands to support common lifecycle events of SEV
+guests, such as launching, running, snapshotting, migrating and decommissioning.
+
+1. KVM_SEV_INIT
+---------------
+
+The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
+context. In a typical workflow, this command should be the first command issued.
+
+Returns: 0 on success, -negative on error
+
+2. KVM_SEV_LAUNCH_START
+-----------------------
+
+The KVM_SEV_LAUNCH_START command is used for creating the memory encryption
+context. To create the encryption context, user must provide a guest policy,
+the owner's public Diffie-Hellman (PDH) key and session information.
+
+Parameters: struct kvm_sev_launch_start (in/out)
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_launch_start {
+ __u32 handle; /* if zero then firmware creates a new handle */
+ __u32 policy; /* guest's policy */
+
+ __u64 dh_uaddr; /* userspace address pointing to the guest owner's PDH key */
+ __u32 dh_len;
+
+ __u64 session_addr; /* userspace address which points to the guest session information */
+ __u32 session_len;
+ };
+
+On success, the 'handle' field contains a new handle and on error, a negative value.
+
+For more details, see SEV spec Section 6.2.
+
+3. KVM_SEV_LAUNCH_UPDATE_DATA
+-----------------------------
+
+The KVM_SEV_LAUNCH_UPDATE_DATA is used for encrypting a memory region. It also
+calculates a measurement of the memory contents. The measurement is a signature
+of the memory contents that can be sent to the guest owner as an attestation
+that the memory was encrypted correctly by the firmware.
+
+Parameters (in): struct kvm_sev_launch_update_data
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_launch_update {
+ __u64 uaddr; /* userspace address to be encrypted (must be 16-byte aligned) */
+ __u32 len; /* length of the data to be encrypted (must be 16-byte aligned) */
+ };
+
+For more details, see SEV spec Section 6.3.
+
+4. KVM_SEV_LAUNCH_MEASURE
+-------------------------
+
+The KVM_SEV_LAUNCH_MEASURE command is used to retrieve the measurement of the
+data encrypted by the KVM_SEV_LAUNCH_UPDATE_DATA command. The guest owner may
+wait to provide the guest with confidential information until it can verify the
+measurement. Since the guest owner knows the initial contents of the guest at
+boot, the measurement can be verified by comparing it to what the guest owner
+expects.
+
+Parameters (in): struct kvm_sev_launch_measure
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_launch_measure {
+ __u64 uaddr; /* where to copy the measurement */
+ __u32 len; /* length of measurement blob */
+ };
+
+For more details on the measurement verification flow, see SEV spec Section 6.4.
+
+5. KVM_SEV_LAUNCH_FINISH
+------------------------
+
+After completion of the launch flow, the KVM_SEV_LAUNCH_FINISH command can be
+issued to make the guest ready for the execution.
+
+Returns: 0 on success, -negative on error
+
+6. KVM_SEV_GUEST_STATUS
+-----------------------
+
+The KVM_SEV_GUEST_STATUS command is used to retrieve status information about a
+SEV-enabled guest.
+
+Parameters (out): struct kvm_sev_guest_status
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_guest_status {
+ __u32 handle; /* guest handle */
+ __u32 policy; /* guest policy */
+ __u8 state; /* guest state (see enum below) */
+ };
+
+SEV guest state:
+
+::
+
+ enum {
+ SEV_STATE_INVALID = 0;
+ SEV_STATE_LAUNCHING, /* guest is currently being launched */
+ SEV_STATE_SECRET, /* guest is being launched and ready to accept the ciphertext data */
+ SEV_STATE_RUNNING, /* guest is fully launched and running */
+ SEV_STATE_RECEIVING, /* guest is being migrated in from another SEV machine */
+ SEV_STATE_SENDING /* guest is getting migrated out to another SEV machine */
+ };
+
+7. KVM_SEV_DBG_DECRYPT
+----------------------
+
+The KVM_SEV_DEBUG_DECRYPT command can be used by the hypervisor to request the
+firmware to decrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_dbg {
+ __u64 src_uaddr; /* userspace address of data to decrypt */
+ __u64 dst_uaddr; /* userspace address of destination */
+ __u32 len; /* length of memory region to decrypt */
+ };
+
+The command returns an error if the guest policy does not allow debugging.
+
+8. KVM_SEV_DBG_ENCRYPT
+----------------------
+
+The KVM_SEV_DEBUG_ENCRYPT command can be used by the hypervisor to request the
+firmware to encrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_dbg {
+ __u64 src_uaddr; /* userspace address of data to encrypt */
+ __u64 dst_uaddr; /* userspace address of destination */
+ __u32 len; /* length of memory region to encrypt */
+ };
+
+The command returns an error if the guest policy does not allow debugging.
+
+9. KVM_SEV_LAUNCH_SECRET
+------------------------
+
+The KVM_SEV_LAUNCH_SECRET command can be used by the hypervisor to inject secret
+data after the measurement has been validated by the guest owner.
+
+Parameters (in): struct kvm_sev_launch_secret
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr; /* userspace address containing the packet header */
+ __u32 hdr_len;
+
+ __u64 guest_uaddr; /* the guest memory region where the secret should be injected */
+ __u32 guest_len;
+
+ __u64 trans_uaddr; /* the hypervisor memory region which contains the secret */
+ __u32 trans_len;
+ };
+
+References
+==========
+
+.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
+.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Specification.pdf
+.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
+.. [kvm-forum] http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index fc3ae951bc07..792fa8717d13 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1841,6 +1841,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_DBSR | 32
PPC | KVM_REG_PPC_TIDR | 64
PPC | KVM_REG_PPC_PSSCR | 64
+ PPC | KVM_REG_PPC_DEC_EXPIRY | 64
PPC | KVM_REG_PPC_TM_GPR0 | 64
...
PPC | KVM_REG_PPC_TM_GPR31 | 64
@@ -3403,7 +3404,7 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
or if no page table is present for the addresses (e.g. when using
hugepages).
-4.108 KVM_PPC_GET_CPU_CHAR
+4.109 KVM_PPC_GET_CPU_CHAR
Capability: KVM_CAP_PPC_GET_CPU_CHAR
Architectures: powerpc
@@ -3449,6 +3450,57 @@ array bounds check and the array access.
These fields use the same bit definitions as the new
H_GET_CPU_CHARACTERISTICS hypercall.
+4.110 KVM_MEMORY_ENCRYPT_OP
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: an opaque platform specific structure (in/out)
+Returns: 0 on success; -1 on error
+
+If the platform supports creating encrypted VMs then this ioctl can be used
+for issuing platform-specific memory encryption commands to manage those
+encrypted VMs.
+
+Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+(SEV) commands on AMD Processors. The SEV commands are defined in
+Documentation/virtual/kvm/amd-memory-encryption.txt.
+
+4.111 KVM_MEMORY_ENCRYPT_REG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to register a guest memory region which may
+contain encrypted data (e.g. guest RAM, SMRAM etc).
+
+It is used in the SEV-enabled guest. When encryption is enabled, a guest
+memory region may contain encrypted data. The SEV memory encryption
+engine uses a tweak such that two identical plaintext pages, each at
+different locations will have differing ciphertexts. So swapping or
+moving ciphertext of those pages will not result in plaintext being
+swapped. So relocating (or migrating) physical backing pages for the SEV
+guest will require some additional steps.
+
+Note: The current SEV key management spec does not provide commands to
+swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+memory region registered with the ioctl.
+
+4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to unregister the guest memory region registered
+with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
+
+
5. The kvm_run structure
------------------------
diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
deleted file mode 100644
index 38bca2835278..000000000000
--- a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
+++ /dev/null
@@ -1,187 +0,0 @@
-KVM/ARM VGIC Forwarded Physical Interrupts
-==========================================
-
-The KVM/ARM code implements software support for the ARM Generic
-Interrupt Controller's (GIC's) hardware support for virtualization by
-allowing software to inject virtual interrupts to a VM, which the guest
-OS sees as regular interrupts. The code is famously known as the VGIC.
-
-Some of these virtual interrupts, however, correspond to physical
-interrupts from real physical devices. One example could be the
-architected timer, which itself supports virtualization, and therefore
-lets a guest OS program the hardware device directly to raise an
-interrupt at some point in time. When such an interrupt is raised, the
-host OS initially handles the interrupt and must somehow signal this
-event as a virtual interrupt to the guest. Another example could be a
-passthrough device, where the physical interrupts are initially handled
-by the host, but the device driver for the device lives in the guest OS
-and KVM must therefore somehow inject a virtual interrupt on behalf of
-the physical one to the guest OS.
-
-These virtual interrupts corresponding to a physical interrupt on the
-host are called forwarded physical interrupts, but are also sometimes
-referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
-
-Forwarded physical interrupts are handled slightly differently compared
-to virtual interrupts generated purely by a software emulated device.
-
-
-The HW bit
-----------
-Virtual interrupts are signalled to the guest by programming the List
-Registers (LRs) on the GIC before running a VCPU. The LR is programmed
-with the virtual IRQ number and the state of the interrupt (Pending,
-Active, or Pending+Active). When the guest ACKs and EOIs a virtual
-interrupt, the LR state moves from Pending to Active, and finally to
-inactive.
-
-The LRs include an extra bit, called the HW bit. When this bit is set,
-KVM must also program an additional field in the LR, the physical IRQ
-number, to link the virtual with the physical IRQ.
-
-When the HW bit is set, KVM must EITHER set the Pending OR the Active
-bit, never both at the same time.
-
-Setting the HW bit causes the hardware to deactivate the physical
-interrupt on the physical distributor when the guest deactivates the
-corresponding virtual interrupt.
-
-
-Forwarded Physical Interrupts Life Cycle
-----------------------------------------
-
-The state of forwarded physical interrupts is managed in the following way:
-
- - The physical interrupt is acked by the host, and becomes active on
- the physical distributor (*).
- - KVM sets the LR.Pending bit, because this is the only way the GICV
- interface is going to present it to the guest.
- - LR.Pending will stay set as long as the guest has not acked the interrupt.
- - LR.Pending transitions to LR.Active on the guest read of the IAR, as
- expected.
- - On guest EOI, the *physical distributor* active bit gets cleared,
- but the LR.Active is left untouched (set).
- - KVM clears the LR on VM exits when the physical distributor
- active state has been cleared.
-
-(*): The host handling is slightly more complicated. For some forwarded
-interrupts (shared), KVM directly sets the active state on the physical
-distributor before entering the guest, because the interrupt is never actually
-handled on the host (see details on the timer as an example below). For other
-forwarded interrupts (non-shared) the host does not deactivate the interrupt
-when the host ISR completes, but leaves the interrupt active until the guest
-deactivates it. Leaving the interrupt active is allowed, because Linux
-configures the physical GIC with EOIMode=1, which causes EOI operations to
-perform a priority drop allowing the GIC to receive other interrupts of the
-default priority.
-
-
-Forwarded Edge and Level Triggered PPIs and SPIs
-------------------------------------------------
-Forwarded physical interrupts injected should always be active on the
-physical distributor when injected to a guest.
-
-Level-triggered interrupts will keep the interrupt line to the GIC
-asserted, typically until the guest programs the device to deassert the
-line. This means that the interrupt will remain pending on the physical
-distributor until the guest has reprogrammed the device. Since we
-always run the VM with interrupts enabled on the CPU, a pending
-interrupt will exit the guest as soon as we switch into the guest,
-preventing the guest from ever making progress as the process repeats
-over and over. Therefore, the active state on the physical distributor
-must be set when entering the guest, preventing the GIC from forwarding
-the pending interrupt to the CPU. As soon as the guest deactivates the
-interrupt, the physical line is sampled by the hardware again and the host
-takes a new interrupt if and only if the physical line is still asserted.
-
-Edge-triggered interrupts do not exhibit the same problem with
-preventing guest execution that level-triggered interrupts do. One
-option is to not use HW bit at all, and inject edge-triggered interrupts
-from a physical device as pure virtual interrupts. But that would
-potentially slow down handling of the interrupt in the guest, because a
-physical interrupt occurring in the middle of the guest ISR would
-preempt the guest for the host to handle the interrupt. Additionally,
-if you configure the system to handle interrupts on a separate physical
-core from that running your VCPU, you still have to interrupt the VCPU
-to queue the pending state onto the LR, even though the guest won't use
-this information until the guest ISR completes. Therefore, the HW
-bit should always be set for forwarded edge-triggered interrupts. With
-the HW bit set, the virtual interrupt is injected and additional
-physical interrupts occurring before the guest deactivates the interrupt
-simply mark the state on the physical distributor as Pending+Active. As
-soon as the guest deactivates the interrupt, the host takes another
-interrupt if and only if there was a physical interrupt between injecting
-the forwarded interrupt to the guest and the guest deactivating the
-interrupt.
-
-Consequently, whenever we schedule a VCPU with one or more LRs with the
-HW bit set, the interrupt must also be active on the physical
-distributor.
-
-
-Forwarded LPIs
---------------
-LPIs, introduced in GICv3, are always edge-triggered and do not have an
-active state. They become pending when a device signal them, and as
-soon as they are acked by the CPU, they are inactive again.
-
-It therefore doesn't make sense, and is not supported, to set the HW bit
-for physical LPIs that are forwarded to a VM as virtual interrupts,
-typically virtual SPIs.
-
-For LPIs, there is no other choice than to preempt the VCPU thread if
-necessary, and queue the pending state onto the LR.
-
-
-Putting It Together: The Architected Timer
-------------------------------------------
-The architected timer is a device that signals interrupts with level
-triggered semantics. The timer hardware is directly accessed by VCPUs
-which program the timer to fire at some point in time. Each VCPU on a
-system programs the timer to fire at different times, and therefore the
-hardware is multiplexed between multiple VCPUs. This is implemented by
-context-switching the timer state along with each VCPU thread.
-
-However, this means that a scenario like the following is entirely
-possible, and in fact, typical:
-
-1. KVM runs the VCPU
-2. The guest programs the time to fire in T+100
-3. The guest is idle and calls WFI (wait-for-interrupts)
-4. The hardware traps to the host
-5. KVM stores the timer state to memory and disables the hardware timer
-6. KVM schedules a soft timer to fire in T+(100 - time since step 2)
-7. KVM puts the VCPU thread to sleep (on a waitqueue)
-8. The soft timer fires, waking up the VCPU thread
-9. KVM reprograms the timer hardware with the VCPU's values
-10. KVM marks the timer interrupt as active on the physical distributor
-11. KVM injects a forwarded physical interrupt to the guest
-12. KVM runs the VCPU
-
-Notice that KVM injects a forwarded physical interrupt in step 11 without
-the corresponding interrupt having actually fired on the host. That is
-exactly why we mark the timer interrupt as active in step 10, because
-the active state on the physical distributor is part of the state
-belonging to the timer hardware, which is context-switched along with
-the VCPU thread.
-
-If the guest does not idle because it is busy, the flow looks like this
-instead:
-
-1. KVM runs the VCPU
-2. The guest programs the time to fire in T+100
-4. At T+100 the timer fires and a physical IRQ causes the VM to exit
- (note that this initially only traps to EL2 and does not run the host ISR
- until KVM has returned to the host).
-5. With interrupts still disabled on the CPU coming back from the guest, KVM
- stores the virtual timer state to memory and disables the virtual hw timer.
-6. KVM looks at the timer state (in memory) and injects a forwarded physical
- interrupt because it concludes the timer has expired.
-7. KVM marks the timer interrupt as active on the physical distributor
-7. KVM enables the timer, enables interrupts, and runs the VCPU
-
-Notice that again the forwarded physical interrupt is injected to the
-guest without having actually been handled on the host. In this case it
-is because the physical interrupt is never actually seen by the host because the
-timer is disabled upon guest return, and the virtual forwarded interrupt is
-injected on the KVM guest entry path.
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 3c65feb83010..dcab6dc11e3b 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|| || before enabling paravirtualized
|| || spinlock support.
------------------------------------------------------------------------------
+KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
+ || || before enabling paravirtualized
+ || || tlb flush.
+------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
diff --git a/MAINTAINERS b/MAINTAINERS
index e6c26cb47d02..3bdc260e36b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7748,7 +7748,9 @@ F: arch/powerpc/kernel/kvm*
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <[email protected]>
-M: Cornelia Huck <[email protected]>
+M: Janosch Frank <[email protected]>
+R: David Hildenbrand <[email protected]>
+R: Cornelia Huck <[email protected]>
W: http://www.ibm.com/developerworks/linux/linux390/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@ -12026,6 +12028,7 @@ F: drivers/pci/hotplug/s390_pci_hpc.c
S390 VFIO-CCW DRIVER
M: Cornelia Huck <[email protected]>
M: Dong Jia Shi <[email protected]>
+M: Halil Pasic <[email protected]>
S: Supported
diff --git a/Makefile b/Makefile
index cd9145c0878d..79ad2bfa24b6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
-PATCHLEVEL = 15
+PATCHLEVEL = 16
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Fearless Coyote
# *DOCUMENTATION*
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 3d22eb87f919..9003bd19cb70 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
{
unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
- return cpsr_mode > USR_MODE;;
+ return cpsr_mode > USR_MODE;
}
static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ef54013b5b9f..248b930563e5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index ab20ffa8b9e7..1ab8329e9ff7 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -21,7 +21,6 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/cp15.h>
-#include <asm/kvm_mmu.h>
#include <asm/vfp.h>
#define __hyp_text __section(.hyp.text) notrace
@@ -69,6 +68,8 @@
#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
+#define BPIALLIS __ACCESS_CP15(c7, 0, c1, 6)
+#define ICIMVAU __ACCESS_CP15(c7, 0, c5, 1)
#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a2d176a308bd..de1b919404e4 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,8 @@
#include <linux/highmem.h>
#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/kvm_hyp.h>
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
}
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+ pte_val(pte) &= ~L_PTE_XN;
+ return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_SECT_XN;
+ return pmd;
+}
+
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
}
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+ return !(pte_val(*pte) & L_PTE_XN);
+}
+
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
}
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+ return !(pmd_val(*pmd) & PMD_SECT_XN);
+}
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
}
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
- kvm_pfn_t pfn,
- unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+ /*
+ * Clean the dcache to the Point of Coherency.
+ *
+ * We need to do this through a kernel mapping (using the
+ * user-space mapping has proved to be the wrong
+ * solution). For that, we need to kmap one page at a time,
+ * and iterate over the range.
+ */
+
+ VM_BUG_ON(size & ~PAGE_MASK);
+
+ while (size) {
+ void *va = kmap_atomic_pfn(pfn);
+
+ kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+ size -= PAGE_SIZE;
+ pfn++;
+
+ kunmap_atomic(va);
+ }
+}
+
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+ unsigned long size)
{
+ u32 iclsz;
+
/*
* If we are going to insert an instruction page and the icache is
* either VIPT or PIPT, there is a potential problem where the host
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
*
* VIVT caches are tagged using both the ASID and the VMID and doesn't
* need any kind of flushing (DDI 0406C.b - Page B3-1392).
- *
- * We need to do this through a kernel mapping (using the
- * user-space mapping has proved to be the wrong
- * solution). For that, we need to kmap one page at a time,
- * and iterate over the range.
*/
VM_BUG_ON(size & ~PAGE_MASK);
+ if (icache_is_vivt_asid_tagged())
+ return;
+
+ if (!icache_is_pipt()) {
+ /* any kind of VIPT cache */
+ __flush_icache_all();
+ return;
+ }
+
+ /*
+ * CTR IminLine contains Log2 of the number of words in the
+ * cache line, so we can get the number of words as
+ * 2 << (IminLine - 1). To get the number of bytes, we
+ * multiply by 4 (the number of bytes in a 32-bit word), and
+ * get 4 << (IminLine).
+ */
+ iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
+
while (size) {
void *va = kmap_atomic_pfn(pfn);
+ void *end = va + PAGE_SIZE;
+ void *addr = va;
- kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+ do {
+ write_sysreg(addr, ICIMVAU);
+ addr += iclsz;
+ } while (addr < end);
- if (icache_is_pipt())
- __cpuc_coherent_user_range((unsigned long)va,
- (unsigned long)va + PAGE_SIZE);
+ dsb(ishst);
+ isb();
size -= PAGE_SIZE;
pfn++;
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
kunmap_atomic(va);
}
- if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
- /* any kind of VIPT cache */
- __flush_icache_all();
+ /* Check if we need to invalidate the BTB */
+ if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
+ write_sysreg(0, BPIALLIS);
+ dsb(ishst);
+ isb();
}
}
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 150ece66ddf3..a757401129f9 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -102,8 +102,8 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
-#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
+#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
+#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 330c9ce34ba5..ae45ae96aac2 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -18,6 +18,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
__asm__(".arch_extension virt");
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af2d9fd..c0edd450e104 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -19,6 +19,7 @@
*/
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
/**
* Flush per-VMID TLBs
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1241fb211293..3c78835bba94 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -436,6 +436,27 @@ alternative_endif
.endm
/*
+ * Macro to perform an instruction cache maintenance for the interval
+ * [start, end)
+ *
+ * start, end: virtual addresses describing the region
+ * label: A label to branch to on user fault.
+ * Corrupts: tmp1, tmp2
+ */
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+ icache_line_size \tmp1, \tmp2
+ sub \tmp2, \tmp1, #1
+ bic \tmp2, \start, \tmp2
+9997:
+USER(\label, ic ivau, \tmp2) // invalidate I line PoU
+ add \tmp2, \tmp2, \tmp1
+ cmp \tmp2, \end
+ b.lo 9997b
+ dsb ish
+ isb
+ .endm
+
+/*
* reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
*/
.macro reset_pmuserenr_el0, tmpreg
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 955130762a3c..bef9f418f089 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -52,6 +52,12 @@
* - start - virtual start address
* - end - virtual end address
*
+ * invalidate_icache_range(start, end)
+ *
+ * Invalidate the I-cache in the region described by start, end.
+ * - start - virtual start address
+ * - end - virtual end address
+ *
* __flush_cache_user_range(start, end)
*
* Ensure coherency between the I-cache and the D-cache in the
@@ -66,6 +72,7 @@
* - size - region size
*/
extern void flush_icache_range(unsigned long start, unsigned long end);
+extern int invalidate_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
extern void __inval_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a73f63aca68e..596f8e414a4c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 08d3bb66c8b7..f26f9cd70c72 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,7 +20,6 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 72e279dbae5f..9679067a1574 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
}
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+ pte_val(pte) &= ~PTE_S2_XN;
+ return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_S2_XN;
+ return pmd;
+}
+
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
pteval_t old_pteval, pteval;
@@ -191,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+ return !(pte_val(*pte) & PTE_S2_XN);
+}
+
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
kvm_set_s2pte_readonly((pte_t *)pmd);
@@ -201,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return kvm_s2pte_readonly((pte_t *)pmd);
}
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+ return !(pmd_val(*pmd) & PMD_S2_XN);
+}
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
- kvm_pfn_t pfn,
- unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
void *va = page_address(pfn_to_page(pfn));
kvm_flush_dcache_to_poc(va, size);
+}
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+ unsigned long size)
+{
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
__flush_icache_all();
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
- flush_icache_range((unsigned long)va,
- (unsigned long)va + size);
+ void *va = page_address(pfn_to_page(pfn));
+
+ invalidate_icache_range((unsigned long)va,
+ (unsigned long)va + size);
}
}
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index f42836da8723..cdfe3e657a9e 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -187,9 +187,11 @@
*/
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
+#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
+#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2db84df5eb42..108ecad7acc5 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -67,8 +67,8 @@
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5c7f657dd207..d7e3299a7734 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -361,10 +361,16 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
trace_kvm_set_guest_debug(vcpu, dbg->control);
- if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
- return -EINVAL;
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
+ ret = -EINVAL;
+ goto out;
+ }
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
@@ -378,7 +384,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
}
- return 0;
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index f4363d40e2cd..dabb5cc7b087 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -21,6 +21,7 @@
#include <asm/debug-monitors.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index cac6a0500162..116252a8d3a5 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -24,6 +24,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a96c365..131c7772703c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
*/
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#include <asm/tlbflush.h>
static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 91464e7f77cc..758bde7e2fa6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -60,16 +60,7 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
b.lo 1b
dsb ish
- icache_line_size x2, x3
- sub x3, x2, #1
- bic x4, x0, x3
-1:
-USER(9f, ic ivau, x4 ) // invalidate I line PoU
- add x4, x4, x2
- cmp x4, x1
- b.lo 1b
- dsb ish
- isb
+ invalidate_icache_by_line x0, x1, x2, x3, 9f
mov x0, #0
1:
uaccess_ttbr0_disable x1, x2
@@ -81,6 +72,27 @@ ENDPROC(flush_icache_range)
ENDPROC(__flush_cache_user_range)
/*
+ * invalidate_icache_range(start,end)
+ *
+ * Ensure that the I cache is invalid within specified region.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(invalidate_icache_range)
+ uaccess_ttbr0_enable x2, x3, x4
+
+ invalidate_icache_by_line x0, x1, x2, x3, 2f
+ mov x0, xzr
+1:
+ uaccess_ttbr0_disable x1, x2
+ ret
+2:
+ mov x0, #-EFAULT
+ b 1b
+ENDPROC(invalidate_icache_range)
+
+/*
* __flush_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
diff --git a/arch/blackfin/include/uapi/asm/poll.h b/arch/blackfin/include/uapi/asm/poll.h
index 3b162f2d2970..cd2f1a78aba5 100644
--- a/arch/blackfin/include/uapi/asm/poll.h
+++ b/arch/blackfin/include/uapi/asm/poll.h
@@ -9,25 +9,8 @@
#ifndef _UAPI__BFIN_POLL_H
#define _UAPI__BFIN_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c
index a2986c60aaac..cd0e05d89d42 100644
--- a/arch/cris/arch-v10/drivers/gpio.c
+++ b/arch/cris/arch-v10/drivers/gpio.c
@@ -173,7 +173,7 @@ static __poll_t gpio_poll(struct file *file, poll_table *wait)
if ((data & priv->highalarm) ||
(~data & priv->lowalarm)) {
- mask = POLLIN|POLLRDNORM;
+ mask = EPOLLIN|EPOLLRDNORM;
}
out:
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 177843c64071..ed1a568a7217 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -666,16 +666,16 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
poll_wait(file, &port->in_wait_q, wait);
/* Some room to write */
if (port->out_count < OUT_BUFFER_SIZE)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* At least an inbufchunk of data */
if (sync_data_avail(port) >= port->inbufchunk)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
DEBUGPOLL(if (mask != prev_mask)
printk(KERN_DEBUG "sync_serial_poll: mask 0x%08X %s %s\n",
mask,
- mask & POLLOUT ? "POLLOUT" : "",
- mask & POLLIN ? "POLLIN" : "");
+ mask & EPOLLOUT ? "POLLOUT" : "",
+ mask & EPOLLIN ? "POLLIN" : "");
prev_mask = mask;
);
return mask;
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index e20e0b9a3a5c..1b0ce8a8af16 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -574,24 +574,24 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
/* No active transfer, descriptors are available */
if (port->output && !port->tr_running)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* Descriptor and buffer space available. */
if (port->output &&
port->active_tr_descr != port->catch_tr_descr &&
port->out_buf_count < OUT_BUFFER_SIZE)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* At least an inbufchunk of data */
if (port->input && sync_data_avail(port) >= port->inbufchunk)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
DEBUGPOLL(
if (mask != prev_mask)
pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
mask,
- mask & POLLOUT ? "POLLOUT" : "",
- mask & POLLIN ? "POLLIN" : "");
+ mask & EPOLLOUT ? "POLLOUT" : "",
+ mask & EPOLLIN ? "POLLIN" : "");
prev_mask = mask;
);
return mask;
diff --git a/arch/frv/include/uapi/asm/poll.h b/arch/frv/include/uapi/asm/poll.h
index a44c8f0ebee7..f55b45f475ec 100644
--- a/arch/frv/include/uapi/asm/poll.h
+++ b/arch/frv/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef _ASM_POLL_H
#define _ASM_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
#undef POLLREMOVE
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 858602494096..8fb280e33114 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1670,7 +1670,7 @@ pfm_poll(struct file *filp, poll_table * wait)
PROTECT_CTX(ctx, flags);
if (PFM_CTXQ_EMPTY(ctx) == 0)
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
UNPROTECT_CTX(ctx, flags);
diff --git a/arch/m68k/include/uapi/asm/poll.h b/arch/m68k/include/uapi/asm/poll.h
index d8be239e8141..c3e3fcc15e1d 100644
--- a/arch/m68k/include/uapi/asm/poll.h
+++ b/arch/m68k/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef __m68k_POLL_H
#define __m68k_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
diff --git a/arch/mips/include/uapi/asm/poll.h b/arch/mips/include/uapi/asm/poll.h
index 3173f8917128..ad289d7b7434 100644
--- a/arch/mips/include/uapi/asm/poll.h
+++ b/arch/mips/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef __ASM_POLL_H
#define __ASM_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)0x0100
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 0x0100
#include <asm-generic/poll.h>
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index bbb0f4770c0d..18c509c59f33 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -349,11 +349,11 @@ static __poll_t file_poll(struct file *file, poll_table *wait)
/* data available to read? */
if (rtlx_read_poll(minor, 0))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* space to write */
if (rtlx_write_poll(minor))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index b17447ce8873..76b93a9c8c9b 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 75fdeaa8c62f..2549fdd27ee1 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -446,6 +446,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r = -EINTR;
+ vcpu_load(vcpu);
+
kvm_sigset_activate(vcpu);
if (vcpu->mmio_needed) {
@@ -480,6 +482,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
out:
kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
return r;
}
@@ -900,6 +903,26 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_mips_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+ kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
+ irq.irq);
+
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@@ -907,56 +930,54 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
void __user *argp = (void __user *)arg;
long r;
+ vcpu_load(vcpu);
+
switch (ioctl) {
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
- return -EFAULT;
+ break;
if (ioctl == KVM_SET_ONE_REG)
- return kvm_mips_set_reg(vcpu, &reg);
+ r = kvm_mips_set_reg(vcpu, &reg);
else
- return kvm_mips_get_reg(vcpu, &reg);
+ r = kvm_mips_get_reg(vcpu, &reg);
+ break;
}
case KVM_GET_REG_LIST: {
struct kvm_reg_list __user *user_list = argp;
struct kvm_reg_list reg_list;
unsigned n;
+ r = -EFAULT;
if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
n = reg_list.n;
reg_list.n = kvm_mips_num_regs(vcpu);
if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
+ r = -E2BIG;
if (n < reg_list.n)
- return -E2BIG;
- return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
- }
- case KVM_INTERRUPT:
- {
- struct kvm_mips_interrupt irq;
-
- if (copy_from_user(&irq, argp, sizeof(irq)))
- return -EFAULT;
- kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
- irq.irq);
-
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
break;
- }
+ r = kvm_mips_copy_reg_indices(vcpu, user_list->reg);
+ break;
+ }
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
+ r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
- return -EFAULT;
+ break;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
default:
r = -ENOIOCTLCMD;
}
+
+ vcpu_put(vcpu);
return r;
}
@@ -1145,6 +1166,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
vcpu->arch.gprs[i] = regs->gpr[i];
vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
@@ -1152,6 +1175,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.lo = regs->lo;
vcpu->arch.pc = regs->pc;
+ vcpu_put(vcpu);
return 0;
}
@@ -1159,6 +1183,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
regs->gpr[i] = vcpu->arch.gprs[i];
@@ -1166,6 +1192,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->lo = vcpu->arch.lo;
regs->pc = vcpu->arch.pc;
+ vcpu_put(vcpu);
return 0;
}
diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts
index 36ccdf05837d..56f4b5df6d65 100644
--- a/arch/nios2/boot/dts/3c120_devboard.dts
+++ b/arch/nios2/boot/dts/3c120_devboard.dts
@@ -29,7 +29,7 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu: cpu@0x0 {
+ cpu: cpu@0 {
device_type = "cpu";
compatible = "altr,nios2-1.0";
reg = <0x00000000>;
@@ -69,7 +69,7 @@
compatible = "altr,avalon", "simple-bus";
bus-frequency = <125000000>;
- pb_cpu_to_io: bridge@0x8000000 {
+ pb_cpu_to_io: bridge@8000000 {
compatible = "simple-bus";
reg = <0x08000000 0x00800000>;
#address-cells = <1>;
@@ -83,7 +83,7 @@
<0x00008000 0x08008000 0x00000020>,
<0x00400000 0x08400000 0x00000020>;
- timer_1ms: timer@0x400000 {
+ timer_1ms: timer@400000 {
compatible = "altr,timer-1.0";
reg = <0x00400000 0x00000020>;
interrupt-parent = <&cpu>;
@@ -91,7 +91,7 @@
clock-frequency = <125000000>;
};
- timer_0: timer@0x8000 {
+ timer_0: timer@8000 {
compatible = "altr,timer-1.0";
reg = < 0x00008000 0x00000020 >;
interrupt-parent = < &cpu >;
@@ -99,14 +99,14 @@
clock-frequency = < 125000000 >;
};
- jtag_uart: serial@0x4d50 {
+ jtag_uart: serial@4d50 {
compatible = "altr,juart-1.0";
reg = <0x00004d50 0x00000008>;
interrupt-parent = <&cpu>;
interrupts = <1>;
};
- tse_mac: ethernet@0x4000 {
+ tse_mac: ethernet@4000 {
compatible = "altr,tse-1.0";
reg = <0x00004000 0x00000400>,
<0x00004400 0x00000040>,
@@ -133,7 +133,7 @@
};
};
- uart: serial@0x4c80 {
+ uart: serial@4c80 {
compatible = "altr,uart-1.0";
reg = <0x00004c80 0x00000020>;
interrupt-parent = <&cpu>;
@@ -143,7 +143,7 @@
};
};
- cfi_flash_64m: flash@0x0 {
+ cfi_flash_64m: flash@0 {
compatible = "cfi-flash";
reg = <0x00000000 0x04000000>;
bank-width = <2>;
diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig
index 8b2a30b3b34f..c601c8ff1ae6 100644
--- a/arch/nios2/configs/10m50_defconfig
+++ b/arch/nios2/configs/10m50_defconfig
@@ -33,7 +33,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig
index 9451940678a0..fce33588d55c 100644
--- a/arch/nios2/configs/3c120_defconfig
+++ b/arch/nios2/configs/3c120_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 9a667007bff8..376ae803b69c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
extern int kvmppc_hcall_impl_pr(unsigned long cmd);
extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu);
-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 735cfa35298a..998f7b7aaa9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
- return !lphi ? 24 : -1; /* 16MB */
+ return !lphi ? 24 : 0; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
- return !lphi ? 34 : -1; /* 16GB */
+ return !lphi ? 34 : 0; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
- return -1;
+ return 0;
}
static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
- return 1ul << kvmppc_hpte_actual_page_shift(v, r);
+ int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+ if (shift)
+ return 1ul << shift;
+ return 0;
}
static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;
- if (b_pgshift == 12) {
+ if (b_pgshift <= 12) {
if (a_pgshift > 12) {
sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b577cd60..1f53b562726f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
u8 mmio_vsx_offset;
u8 mmio_vsx_copy_type;
u8 mmio_vsx_tx_sx_enabled;
+ u8 mmio_vmx_copy_nums;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
@@ -709,6 +710,7 @@ struct kvm_vcpu_arch {
u8 ceded;
u8 prodded;
u8 doorbell_request;
+ u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
struct swait_queue_head *wqp;
@@ -738,8 +740,11 @@ struct kvm_vcpu_arch {
struct kvmppc_icp *icp; /* XICS presentation controller */
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
- u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_esc_on; /* Is the escalation irq enabled ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+ u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
+ u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -800,6 +805,7 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
#define KVM_MMIO_REG_VSX 0x0080
+#define KVM_MMIO_REG_VMX 0x00c0
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9db18287b5f4..7765a800ddae 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
+extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes,
int is_default_endian);
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 24c73f5575ee..94bd1bf2c873 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1076,6 +1076,7 @@ enum {
/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
enum {
OPAL_XIVE_VP_ENABLED = 0x00000001,
+ OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
};
/* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ab5c1588b487..f1083bcf449c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -156,6 +156,12 @@
#define OP_31_XOP_LFDX 599
#define OP_31_XOP_LFDUX 631
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX 103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX 231
+
#define OP_LWZ 32
#define OP_STFS 52
#define OP_STFSU 53
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 7624e22f5045..8d1a2792484f 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 88b84ac76b53..ea5eb91b836e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -520,6 +520,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -739,6 +740,9 @@ int main(void)
DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
arch.xive_cam_word));
DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+ DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+ DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+ DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index ae2ede4de6be..446c79611d56 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -362,7 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
*/
static int pci_read_irq_line(struct pci_dev *pci_dev)
{
- unsigned int virq = 0;
+ int virq;
pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
@@ -370,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
memset(&oirq, 0xff, sizeof(oirq));
#endif
/* Try to get a mapping from the device-tree */
- if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) {
+ virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+ if (virq <= 0) {
u8 line, pin;
/* If that fails, lets fallback to what is in the config
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index fc600a8b1e77..f915db93cd42 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -392,7 +392,7 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
{
poll_wait(file, &rtas_log_wait, wait);
if (rtas_log_size)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b12b8eb39c29..68a0e9d5b440 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_EVENTFD
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select SRCU
select KVM_VFIO
select IRQ_BYPASS_MANAGER
@@ -68,7 +69,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
- select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 72d977e30952..234531d1bee1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -484,19 +484,33 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = kvmppc_get_pc(vcpu);
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = kvmppc_get_ctr(vcpu);
@@ -518,6 +532,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -525,6 +540,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
kvmppc_set_pc(vcpu, regs->pc);
kvmppc_set_cr(vcpu, regs->cr);
kvmppc_set_ctr(vcpu, regs->ctr);
@@ -545,6 +562,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -737,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ vcpu_load(vcpu);
vcpu->guest_debug = dbg->control;
+ vcpu_put(vcpu);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b73dbc9e797d..ef243fed2f2b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1269,6 +1269,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Nothing to do */
goto out;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = be64_to_cpu(hptep[1]);
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ }
+
/* Unmap */
rev = &old->rev[idx];
guest_rpte = rev->guest_rpte;
@@ -1298,7 +1303,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Reload PTE after unmap */
vpte = be64_to_cpu(hptep[0]);
-
BUG_ON(vpte & HPTE_V_VALID);
BUG_ON(!(vpte & HPTE_V_ABSENT));
@@ -1307,6 +1311,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ rpte = hpte_new_to_old_r(rpte);
+ }
+
pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
@@ -1337,17 +1347,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
}
new_pteg = hash & new_hash_mask;
- if (vpte & HPTE_V_SECONDARY) {
- BUG_ON(~pteg != (hash & old_hash_mask));
- new_pteg = ~new_pteg;
- } else {
- BUG_ON(pteg != (hash & old_hash_mask));
- }
+ if (vpte & HPTE_V_SECONDARY)
+ new_pteg = ~hash & new_hash_mask;
new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
new_hptep = (__be64 *)(new->virt + (new_idx << 4));
replace_vpte = be64_to_cpu(new_hptep[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+ }
if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
BUG_ON(new->order >= old->order);
@@ -1363,6 +1373,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Discard the previous HPTE */
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = hpte_old_to_new_r(vpte, rpte);
+ vpte = hpte_old_to_new_v(vpte);
+ }
+
new_hptep[1] = cpu_to_be64(rpte);
new->rev[new_idx].guest_rpte = guest_rpte;
/* No need for a barrier, since new HPT isn't active */
@@ -1380,12 +1395,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
unsigned long i;
int rc;
- /*
- * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
- * that POWER9 uses, and could well hit a BUG_ON on POWER9.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- return -EIO;
for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
rc = resize_hpt_rehash_hpte(resize, i);
if (rc != 0)
@@ -1416,6 +1425,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
synchronize_srcu_expedited(&kvm->srcu);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 58618f644c56..0c854816e653 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
j = i + 1;
if (npages) {
set_dirty_bits(map, i, npages);
- i = j + npages;
+ j = i + npages;
}
}
return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e4f70c33fbc7..89707354c2ef 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -116,6 +116,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* If set, the threads on each CPU core have to be in the same MMU mode */
+static bool no_mixing_hpt_and_radix;
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -1003,8 +1006,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tvcpu;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return EMULATE_FAIL;
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
return RESUME_GUEST;
if (get_op(inst) != 31)
@@ -1054,6 +1055,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+/* Called with vcpu->arch.vcore->lock held */
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -1174,7 +1176,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_debug_inst(run, vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1189,8 +1194,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
r = EMULATE_FAIL;
- if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
+ cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_doorbell_instr(vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
+ }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1495,6 +1505,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ *val = get_reg_val(id, vcpu->arch.dec_expires +
+ vcpu->arch.vcore->tb_offset);
+ break;
default:
r = -EINVAL;
break;
@@ -1722,6 +1736,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ vcpu->arch.dec_expires = set_reg_val(id, *val) -
+ vcpu->arch.vcore->tb_offset;
+ break;
default:
r = -EINVAL;
break;
@@ -2376,8 +2394,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
static bool subcore_config_ok(int n_subcores, int n_threads)
{
/*
- * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
- * mode, with one thread per subcore.
+ * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+ * split-core mode, with one thread per subcore.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
return n_subcores <= 4 && n_threads == 1;
@@ -2413,8 +2431,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return false;
- /* POWER9 currently requires all threads to be in the same MMU mode */
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ /* Some POWER9 chips require all threads to be in the same MMU mode */
+ if (no_mixing_hpt_and_radix &&
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
return false;
@@ -2677,9 +2695,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
* On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host.
+ * this is a HPT guest on a radix host machine where the
+ * CPU threads may not be in different MMU modes.
*/
- hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+ hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
+ !kvm_is_radix(vc->kvm);
if (((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
(hpt_on_radix && vc->kvm->arch.threads_indep)) {
@@ -2829,7 +2849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if (!thr0_done)
kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
}
/*
@@ -2932,13 +2951,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
+ preempt_enable();
+
for (sub = 0; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
post_guest_process(pvc, pvc == vc);
}
spin_lock(&vc->lock);
- preempt_enable();
out:
vc->vcore_state = VCORE_INACTIVE;
@@ -2985,7 +3005,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{
if (!xive_enabled())
return false;
- return vcpu->arch.xive_saved_state.pipr <
+ return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr;
}
#else
@@ -3174,17 +3194,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* this thread straight away and have it join in.
*/
if (!signal_pending(current)) {
- if (vc->vcore_state == VCORE_PIGGYBACK) {
- if (spin_trylock(&vc->lock)) {
- if (vc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(vc)) {
- kvmppc_create_dtl_entry(vcpu, vc);
- kvmppc_start_thread(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
- }
- spin_unlock(&vc->lock);
- }
- } else if (vc->vcore_state == VCORE_RUNNING &&
+ if ((vc->vcore_state == VCORE_PIGGYBACK ||
+ vc->vcore_state == VCORE_RUNNING) &&
!VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
@@ -4446,6 +4457,19 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
+
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ no_mixing_hpt_and_radix = true;
+ }
+
return r;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7886b313d135..f31f357b8c5a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -413,10 +413,11 @@ FTR_SECTION_ELSE
/* On P9 we use the split_info for coordinating LPCR changes */
lwz r4, KVM_SPLIT_DO_SET(r6)
cmpwi r4, 0
- beq 63f
+ beq 1f
mr r3, r6
bl kvmhv_p9_set_lpcr
nop
+1:
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
@@ -617,13 +618,6 @@ kvmppc_hv_entry:
lbz r0, KVM_RADIX(r9)
cmpwi cr7, r0, 0
- /* Clear out SLB if hash */
- bne cr7, 2f
- li r6,0
- slbmte r6,r6
- slbia
- ptesync
-2:
/*
* POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies,
@@ -738,19 +732,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
10: cmpdi r4, 0
beq kvmppc_primary_no_guest
kvmppc_got_guest:
-
- /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
- lwz r5,VCPU_SLB_MAX(r4)
- cmpwi r5,0
- beq 9f
- mtctr r5
- addi r6,r4,VCPU_SLB
-1: ld r8,VCPU_SLB_E(r6)
- ld r9,VCPU_SLB_V(r6)
- slbmte r9,r8
- addi r6,r6,VCPU_SLB_SIZE
- bdnz 1b
-9:
/* Increment yield count if they have a VPA */
ld r3, VCPU_VPA(r4)
cmpdi r3, 0
@@ -957,7 +938,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -1018,6 +998,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
+ /* For hash guest, clear out and reload the SLB */
+ ld r6, VCPU_KVM(r4)
+ lbz r0, KVM_RADIX(r6)
+ cmpwi r0, 0
+ bne 9f
+ li r6, 0
+ slbmte r6, r6
+ slbia
+ ptesync
+
+ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+ lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@ -1031,8 +1034,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li r9, TM_QW1_OS + TM_WORD2
stwcix r11,r9,r10
li r9, 1
- stw r9, VCPU_XIVE_PUSHED(r4)
+ stb r9, VCPU_XIVE_PUSHED(r4)
eieio
+
+ /*
+ * We clear the irq_pending flag. There is a small chance of a
+ * race vs. the escalation interrupt happening on another
+ * processor setting it again, but the only consequence is to
+ * cause a spurrious wakeup on the next H_CEDE which is not an
+ * issue.
+ */
+ li r0,0
+ stb r0, VCPU_IRQ_PENDING(r4)
+
+ /*
+ * In single escalation mode, if the escalation interrupt is
+ * on, we mask it.
+ */
+ lbz r0, VCPU_XIVE_ESC_ON(r4)
+ cmpwi r0,0
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_RADDR(r4)
+ li r9, XIVE_ESB_SET_PQ_01
+ ldcix r0, r10, r9
+ sync
+
+ /* We have a possible subtle race here: The escalation interrupt might
+ * have fired and be on its way to the host queue while we mask it,
+ * and if we unmask it early enough (re-cede right away), there is
+ * a theorical possibility that it fires again, thus landing in the
+ * target queue more than once which is a big no-no.
+ *
+ * Fortunately, solving this is rather easy. If the above load setting
+ * PQ to 01 returns a previous value where P is set, then we know the
+ * escalation interrupt is somewhere on its way to the host. In that
+ * case we simply don't clear the xive_esc_on flag below. It will be
+ * eventually cleared by the handler for the escalation interrupt.
+ *
+ * Then, when doing a cede, we check that flag again before re-enabling
+ * the escalation interrupt, and if set, we abort the cede.
+ */
+ andi. r0, r0, XIVE_ESB_VAL_P
+ bne- 1f
+
+ /* Now P is 0, we can clear the flag */
+ li r0, 0
+ stb r0, VCPU_XIVE_ESC_ON(r4)
+1:
no_xive:
#endif /* CONFIG_KVM_XICS */
@@ -1193,7 +1241,7 @@ hdec_soon:
addi r3, r4, VCPU_TB_RMEXIT
bl kvmhv_accumulate_time
#endif
- b guest_exit_cont
+ b guest_bypass
/******************************************************************************
* *
@@ -1423,15 +1471,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
blt deliver_guest_interrupt
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+ /* Save more register state */
+ mfdar r6
+ mfdsisr r7
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq mc_cont
+ std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r9, VCPU_TB_RMEXIT
+ mr r4, r9
+ bl kvmhv_accumulate_time
+#endif
#ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
- lwz r0, VCPU_XIVE_PUSHED(r9)
+ lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
beq 1f
li r7, TM_SPC_PULL_OS_CTX
li r6, TM_QW1_OS
mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
+ andi. r0, r0, MSR_DR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr0, r10, 0
@@ -1454,33 +1522,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Fixup some of the state for the next load */
li r10, 0
li r0, 0xff
- stw r10, VCPU_XIVE_PUSHED(r9)
+ stb r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
eieio
1:
#endif /* CONFIG_KVM_XICS */
- /* Save more register state */
- mfdar r6
- mfdsisr r7
- std r6, VCPU_DAR(r9)
- stw r7, VCPU_DSISR(r9)
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
- beq mc_cont
- std r6, VCPU_FAULT_DAR(r9)
- stw r7, VCPU_FAULT_DSISR(r9)
- /* See if it is a machine check */
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- beq machine_check_realmode
-mc_cont:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
- addi r3, r9, VCPU_TB_RMEXIT
- mr r4, r9
- bl kvmhv_accumulate_time
-#endif
+ /* For hash guest, read the guest SLB and save it away */
+ ld r5, VCPU_KVM(r9)
+ lbz r0, KVM_RADIX(r5)
+ li r5, 0
+ cmpwi r0, 0
+ bne 3f /* for radix, save 0 entries */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ /* Finally clear out the SLB */
+ li r0,0
+ slbmte r0,r0
+ slbia
+ ptesync
+3: stw r5,VCPU_SLB_MAX(r9)
+guest_bypass:
mr r3, r12
/* Increment exit count, poke other threads to exit */
bl kvmhv_commence_exit
@@ -1501,31 +1578,6 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
- /* Check if we are running hash or radix and store it in cr2 */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2,r0,0
-
- /* Read the guest SLB and save it away */
- li r5, 0
- bne cr2, 3f /* for radix, save 0 entries */
- lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
- mtctr r0
- li r6,0
- addi r7,r9,VCPU_SLB
-1: slbmfee r8,r6
- andis. r0,r8,SLB_ESID_V@h
- beq 2f
- add r8,r8,r6 /* put index in */
- slbmfev r3,r6
- std r8,VCPU_SLB_E(r7)
- std r3,VCPU_SLB_V(r7)
- addi r7,r7,VCPU_SLB_SIZE
- addi r5,r5,1
-2: addi r6,r6,1
- bdnz 1b
-3: stw r5,VCPU_SLB_MAX(r9)
-
/*
* Save the guest PURR/SPURR
*/
@@ -1803,7 +1855,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi cr2, r0, 0
- beq cr2, 3f
+ beq cr2, 4f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
@@ -1839,15 +1891,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
- b 4f
+4:
#endif /* CONFIG_PPC_RADIX_MMU */
- /* Hash: clear out SLB */
-3: li r5,0
- slbmte r5,r5
- slbia
- ptesync
-4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -2745,7 +2791,32 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
- b guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+ /* Abort if we still have a pending escalation */
+ lbz r5, VCPU_XIVE_ESC_ON(r9)
+ cmpwi r5, 0
+ beq 1f
+ li r0, 0
+ stb r0, VCPU_CEDED(r9)
+1: /* Enable XIVE escalation */
+ li r5, XIVE_ESB_SET_PQ_00
+ mfmsr r0
+ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldx r0, r10, r5
+ b 2f
+1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldcix r0, r10, r5
+2: sync
+ li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3: b guest_exit_cont
/* Try to handle a machine check in real mode */
machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 901e6fe00c39..c18e845019ec 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -96,7 +96,7 @@ kvm_start_entry:
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- GET_SHADOW_VCPU(r3)
+ mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
REST_GPR(4, r1)
@@ -165,9 +165,7 @@ after_sprg3_load:
stw r12, VCPU_TRAP(r3)
/* Transfer reg values from shadow vcpu back to vcpu struct */
- /* On 64-bit, interrupts are still off at this point */
- GET_SHADOW_VCPU(r4)
bl FUNC(kvmppc_copy_from_svcpu)
nop
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7deaeeb14b93..3ae752314b34 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -121,7 +121,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
if (svcpu->in_use) {
- kvmppc_copy_from_svcpu(vcpu, svcpu);
+ kvmppc_copy_from_svcpu(vcpu);
}
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
@@ -143,9 +143,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
}
/* Copy data needed by real-mode code from vcpu to shadow vcpu */
-void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu)
+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
svcpu->gpr[0] = vcpu->arch.gpr[0];
svcpu->gpr[1] = vcpu->arch.gpr[1];
svcpu->gpr[2] = vcpu->arch.gpr[2];
@@ -177,17 +178,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.entry_ic = mfspr(SPRN_IC);
svcpu->in_use = true;
+
+ svcpu_put(svcpu);
}
/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
-void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu)
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
{
- /*
- * vcpu_put would just call us again because in_use hasn't
- * been updated yet.
- */
- preempt_disable();
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
/*
* Maybe we were already preempted and synced the svcpu from
@@ -233,7 +231,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
svcpu->in_use = false;
out:
- preempt_enable();
+ svcpu_put(svcpu);
}
static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6882bc94eba8..f0f5cd4d2fe7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
{
struct kvm_vcpu *vcpu = data;
- /* We use the existing H_PROD mechanism to wake up the target */
- vcpu->arch.prodded = 1;
+ vcpu->arch.irq_pending = 1;
smp_mb();
if (vcpu->arch.ceded)
kvmppc_fast_vcpu_kick(vcpu);
+ /* Since we have the no-EOI flag, the interrupt is effectively
+ * disabled now. Clearing xive_esc_on means we won't bother
+ * doing so on the next entry.
+ *
+ * This also allows the entry code to know that if a PQ combination
+ * of 10 is observed while xive_esc_on is true, it means the queue
+ * contains an unprocessed escalation interrupt. We don't make use of
+ * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+ */
+ vcpu->arch.xive_esc_on = false;
+
return IRQ_HANDLED;
}
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
- /*
- * Future improvement: start with them disabled
- * and handle DD2 and later scheme of merged escalation
- * interrupts
- */
- name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
- vcpu->kvm->arch.lpid, xc->server_num, prio);
+ if (xc->xive->single_escalation)
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num);
+ else
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
+
+ pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu);
if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
goto error;
}
xc->esc_virq_names[prio] = name;
+
+ /* In single escalation mode, we grab the ESB MMIO of the
+ * interrupt and mask it. Also populate the VCPU v/raddr
+ * of the ESB page for use by asm entry/exit code. Finally
+ * set the XIVE_IRQ_NO_EOI flag which will prevent the
+ * core code from performing an EOI on the escalation
+ * interrupt, thus leaving it effectively masked after
+ * it fires once.
+ */
+ if (xc->xive->single_escalation) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+ vcpu->arch.xive_esc_raddr = xd->eoi_page;
+ vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+ xd->flags |= XIVE_IRQ_NO_EOI;
+ }
+
return 0;
error:
irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
pr_devel("Provisioning prio... %d\n", prio);
- /* Provision each VCPU and enable escalations */
+ /* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
- if (rc == 0)
+ if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio);
if (rc)
return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
+ pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
@@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
goto bail;
/*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (r) {
+ pr_err("Failed to enable VP in OPAL, err %d\n", r);
+ goto bail;
+ }
+
+ /*
* Initialize queues. Initially we set them all for no queueing
* and we enable escalation for queue 0 only which we'll use for
* our mfrr change notifications. If the VCPU is hot-plugged, we
- * do handle provisioning however.
+ * do handle provisioning however based on the existing "map"
+ * of enabled queues.
*/
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
+ /* Single escalation, no queue 7 */
+ if (i == 7 && xive->single_escalation)
+ break;
+
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
- if (r == 0)
+ if (r == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, i);
if (r)
goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
- /* Enable the VP */
- r = xive_native_enable_vp(xc->vp_id);
- if (r)
- goto bail;
-
/* Route the IPI */
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio);
+
/*
* If the source doesn't already have an IPI, allocate
* one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
if (xive->vp_base == XIVE_INVALID_VP)
ret = -ENOMEM;
+ xive->single_escalation = xive_native_has_single_escalation();
+
if (ret) {
kfree(xive);
return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
if (!xc)
continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_printf(m, "\n");
+ }
+ }
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
u32 q_order;
u32 q_page_order;
+ /* Flags */
+ u8 single_escalation;
};
#define KVMPPC_XIVE_Q_COUNT 8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
* is as follow.
*
* Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
*
* Similar mapping is done for CPPR values
*/
static inline u8 xive_prio_from_guest(u8 prio)
{
- if (prio == 0xff || prio < 8)
+ if (prio == 0xff || prio < 6)
return prio;
- return 7;
+ return 6;
}
static inline u8 xive_prio_to_guest(u8 prio)
{
- if (prio == 0xff || prio < 7)
- return prio;
- return 0xb;
+ return prio;
}
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 83b485810aea..6038e2e7aee0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = vcpu->arch.pc;
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
@@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -1459,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
vcpu->arch.pc = regs->pc;
kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
@@ -1480,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -1607,30 +1613,42 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- int ret;
+ int ret = -EINVAL;
+ vcpu_load(vcpu);
if (vcpu->arch.pvr != sregs->pvr)
- return -EINVAL;
+ goto out;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
@@ -1773,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
int r;
+ vcpu_load(vcpu);
r = kvmppc_core_vcpu_translate(vcpu, tr);
+ vcpu_put(vcpu);
return r;
}
@@ -1996,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
struct debug_reg *dbg_reg;
int n, b = 0, w = 0;
+ int ret = 0;
+
+ vcpu_load(vcpu);
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
- return 0;
+ goto out;
}
kvm_guest_protect_msr(vcpu, MSR_DE, true);
@@ -2033,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
#endif
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- return 0;
+ goto out;
+ ret = -EINVAL;
for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
uint64_t addr = dbg->arch.bp[n].addr;
uint32_t type = dbg->arch.bp[n].type;
@@ -2045,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (type & ~(KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE |
KVMPPC_DEBUG_BREAKPOINT))
- return -EINVAL;
+ goto out;
if (type & KVMPPC_DEBUG_BREAKPOINT) {
/* Setting H/W breakpoint */
if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
- return -EINVAL;
+ goto out;
} else {
/* Setting H/W watchpoint */
if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
type, w++))
- return -EINVAL;
+ goto out;
}
}
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index af833531af31..a382e15135e6 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
+ kvmppc_core_queue_vec_unavail(vcpu);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_ALTIVEC */
+
/*
* XXX to do:
* lfiwax, lfiwzx
@@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
vcpu->arch.mmio_sp64_extend = 0;
vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_vmx_copy_nums = 0;
switch (get_op(inst)) {
case 31:
@@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs, 4, 1);
break;
#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+ case OP_31_XOP_LVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ KVM_MMIO_REG_VMX|rt, 1);
+ break;
+
+ case OP_31_XOP_STVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ rs, 1);
+ break;
+#endif /* CONFIG_ALTIVEC */
+
default:
emulated = EMULATE_FAIL;
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0a7c88786ec0..403e642c78f5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -638,8 +638,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_SPAPR_RESIZE_HPT:
- /* Disable this on POWER9 until code handles new HPTE format */
- r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
+ r = !!hv_enabled;
break;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -763,7 +762,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = ~(u64)0;
+ vcpu->arch.dec_expires = get_tb();
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
@@ -930,6 +929,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+ u32 hi, lo;
+ u32 di;
+
+#ifdef __BIG_ENDIAN
+ hi = gpr >> 32;
+ lo = gpr & 0xffffffff;
+#else
+ lo = gpr >> 32;
+ hi = gpr & 0xffffffff;
+#endif
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
+ VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
+}
+#endif /* CONFIG_ALTIVEC */
+
#ifdef CONFIG_PPC_FPU
static inline u64 sp_to_dp(u32 fprs)
{
@@ -1033,6 +1060,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_vsr_dword_dump(vcpu, gpr);
break;
#endif
+#ifdef CONFIG_ALTIVEC
+ case KVM_MMIO_REG_VMX:
+ kvmppc_set_vmx_dword(vcpu, gpr);
+ break;
+#endif
default:
BUG();
}
@@ -1106,11 +1138,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
enum emulation_result emulated = EMULATE_DONE;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@ -1252,11 +1282,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.io_gpr = rs;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
@@ -1312,6 +1340,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+/* handle quadword load access in two halves */
+int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, int is_default_endian)
+{
+ enum emulation_result emulated;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
+ is_default_endian, 0);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+ vector128 vrs = VCPU_VSX_VR(vcpu, rs);
+ u32 di;
+ u64 w0, w1;
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return -1;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ w0 = vrs.u[di * 2];
+ w1 = vrs.u[di * 2 + 1];
+
+#ifdef __BIG_ENDIAN
+ *val = (w0 << 32) | w1;
+#else
+ *val = (w1 << 32) | w0;
+#endif
+ return 0;
+}
+
+/* handle quadword store in two halves */
+int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rs, int is_default_endian)
+{
+ u64 val = 0;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ vcpu->arch.io_gpr = rs;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
+ return EMULATE_FAIL;
+
+ emulated = kvmppc_handle_store(run, vcpu, val, 8,
+ is_default_endian);
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ enum emulation_result emulated = EMULATE_FAIL;
+ int r;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ } else {
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_ALTIVEC */
+
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = 0;
@@ -1413,6 +1546,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
+ vcpu_load(vcpu);
+
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
if (!vcpu->mmio_is_write)
@@ -1427,7 +1562,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
if (r == RESUME_HOST) {
vcpu->mmio_needed = 1;
- return r;
+ goto out;
+ }
+ }
+#endif
+#ifdef CONFIG_ALTIVEC
+ if (vcpu->arch.mmio_vmx_copy_nums > 0)
+ vcpu->arch.mmio_vmx_copy_nums--;
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ goto out;
}
}
#endif
@@ -1461,6 +1608,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
+out:
+ vcpu_put(vcpu);
return r;
}
@@ -1608,23 +1757,31 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- long r;
- switch (ioctl) {
- case KVM_INTERRUPT: {
+ if (ioctl == KVM_INTERRUPT) {
struct kvm_interrupt irq;
- r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
- goto out;
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- goto out;
+ return -EFAULT;
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ vcpu_load(vcpu);
+ switch (ioctl) {
case KVM_ENABLE_CAP:
{
struct kvm_enable_cap cap;
@@ -1664,6 +1821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index e44d2b2ea97e..1c03c978eb18 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
int i;
u64 min, max, sum, sum_quad;
- seq_printf(m, "%s", "type count min max sum sum_squared\n");
-
+ seq_puts(m, "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 1a9a756b0b2f..857580a78bbd 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -101,9 +101,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_INTR;
@@ -111,9 +111,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
CLASS2_ENABLE_MAILBOX_INTR;
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_THRESHOLD_INTR;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c1be486da899..469bdd0b748f 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -774,7 +774,7 @@ static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
spu_release(ctx);
return mask;
@@ -910,7 +910,7 @@ static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
spu_release(ctx);
return mask;
@@ -1710,9 +1710,9 @@ static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
mask = 0;
if (free_elements & 0xffff)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tagstatus & ctx->tagwait)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
free_elements, tagstatus, ctx->tagwait);
@@ -2469,7 +2469,7 @@ static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
return rc;
if (spufs_switch_log_used(ctx) > 0)
- mask |= POLLIN;
+ mask |= EPOLLIN;
spu_release(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index fff58198b5b6..ae9d24d31eed 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -70,17 +70,17 @@ static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
spu_int_stat_clear(spu, 2,
CLASS2_MAILBOX_THRESHOLD_INTR);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index c18de0a9b1bd..4070bb4e9da4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -153,7 +153,7 @@ static __poll_t opal_prd_poll(struct file *file,
poll_wait(file, &opal_prd_msg_wait, wait);
if (!opal_msg_queue_empty())
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
break;
}
+ /* Do we support single escalation */
+ if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+ xive_has_single_esc = true;
+
/* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
}
EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
{
s64 rc;
+ u64 flags = OPAL_XIVE_VP_ENABLED;
+ if (single_escalation)
+ flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
for (;;) {
- rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+ rc = opal_xive_set_vp_info(vp_id, flags, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+ return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 31e400c1a1f3..86e5b2fdee3c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -261,6 +261,11 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
+static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index a478eb61aaf7..fb56fa3283a2 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -20,7 +20,9 @@ struct css_general_char {
u32 aif_tdd : 1; /* bit 56 */
u32 : 1;
u32 qebsm : 1; /* bit 58 */
- u32 : 8;
+ u32 : 2;
+ u32 aiv : 1; /* bit 61 */
+ u32 : 5;
u32 aif_osa : 1; /* bit 67 */
u32 : 12;
u32 eadm_rf : 1; /* bit 80 */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c1b0a9ac1dc8..afb0f08b8021 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -2,7 +2,7 @@
/*
* definition for kernel virtual machines on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <[email protected]>
*/
@@ -183,6 +183,7 @@ struct kvm_s390_sie_block {
#define ECA_IB 0x40000000
#define ECA_SIGPI 0x10000000
#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
#define ECA_SII 0x00000001
@@ -228,7 +229,9 @@ struct kvm_s390_sie_block {
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
- __u8 reserved70[16]; /* 0x0070 */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
__u64 mso; /* 0x0080 */
__u64 msl; /* 0x0088 */
psw_t gpsw; /* 0x0090 */
@@ -317,18 +320,30 @@ struct kvm_vcpu_stat {
u64 deliver_program_int;
u64 deliver_io_int;
u64 exit_wait_state;
+ u64 instruction_epsw;
+ u64 instruction_gs;
+ u64 instruction_io_other;
+ u64 instruction_lpsw;
+ u64 instruction_lpswe;
u64 instruction_pfmf;
+ u64 instruction_ptff;
+ u64 instruction_sck;
+ u64 instruction_sckpf;
u64 instruction_stidp;
u64 instruction_spx;
u64 instruction_stpx;
u64 instruction_stap;
- u64 instruction_storage_key;
+ u64 instruction_iske;
+ u64 instruction_ri;
+ u64 instruction_rrbe;
+ u64 instruction_sske;
u64 instruction_ipte_interlock;
- u64 instruction_stsch;
- u64 instruction_chsc;
u64 instruction_stsi;
u64 instruction_stfl;
+ u64 instruction_tb;
+ u64 instruction_tpi;
u64 instruction_tprot;
+ u64 instruction_tsch;
u64 instruction_sie;
u64 instruction_essa;
u64 instruction_sthyi;
@@ -354,6 +369,7 @@ struct kvm_vcpu_stat {
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
+ u64 diagnose_other;
};
#define PGM_OPERATION 0x01
@@ -410,35 +426,35 @@ struct kvm_vcpu_stat {
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
-/* irq types in order of priority */
+/* irq types in ascend order of priorities */
enum irq_types {
- IRQ_PEND_MCHK_EX = 0,
- IRQ_PEND_SVC,
- IRQ_PEND_PROG,
- IRQ_PEND_MCHK_REP,
- IRQ_PEND_EXT_IRQ_KEY,
- IRQ_PEND_EXT_MALFUNC,
- IRQ_PEND_EXT_EMERGENCY,
- IRQ_PEND_EXT_EXTERNAL,
- IRQ_PEND_EXT_CLOCK_COMP,
- IRQ_PEND_EXT_CPU_TIMER,
- IRQ_PEND_EXT_TIMING,
- IRQ_PEND_EXT_SERVICE,
- IRQ_PEND_EXT_HOST,
- IRQ_PEND_PFAULT_INIT,
- IRQ_PEND_PFAULT_DONE,
- IRQ_PEND_VIRTIO,
- IRQ_PEND_IO_ISC_0,
- IRQ_PEND_IO_ISC_1,
- IRQ_PEND_IO_ISC_2,
- IRQ_PEND_IO_ISC_3,
- IRQ_PEND_IO_ISC_4,
- IRQ_PEND_IO_ISC_5,
- IRQ_PEND_IO_ISC_6,
- IRQ_PEND_IO_ISC_7,
- IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_SET_PREFIX = 0,
IRQ_PEND_RESTART,
- IRQ_PEND_SET_PREFIX,
+ IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_IO_ISC_7,
+ IRQ_PEND_IO_ISC_6,
+ IRQ_PEND_IO_ISC_5,
+ IRQ_PEND_IO_ISC_4,
+ IRQ_PEND_IO_ISC_3,
+ IRQ_PEND_IO_ISC_2,
+ IRQ_PEND_IO_ISC_1,
+ IRQ_PEND_IO_ISC_0,
+ IRQ_PEND_VIRTIO,
+ IRQ_PEND_PFAULT_DONE,
+ IRQ_PEND_PFAULT_INIT,
+ IRQ_PEND_EXT_HOST,
+ IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_TIMING,
+ IRQ_PEND_EXT_CPU_TIMER,
+ IRQ_PEND_EXT_CLOCK_COMP,
+ IRQ_PEND_EXT_EXTERNAL,
+ IRQ_PEND_EXT_EMERGENCY,
+ IRQ_PEND_EXT_MALFUNC,
+ IRQ_PEND_EXT_IRQ_KEY,
+ IRQ_PEND_MCHK_REP,
+ IRQ_PEND_PROG,
+ IRQ_PEND_SVC,
+ IRQ_PEND_MCHK_EX,
IRQ_PEND_COUNT
};
@@ -516,9 +532,6 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
- struct kvm_s390_float_interrupt *float_int;
- struct swait_queue_head *wq;
- atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
unsigned long pending_irqs;
@@ -707,14 +720,50 @@ struct kvm_s390_crypto_cb {
struct kvm_s390_apcb1 apcb1; /* 0x0080 */
};
+struct kvm_s390_gisa {
+ union {
+ struct { /* common to all formats */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01[2];
+ u8 iam;
+ };
+ struct { /* format 0 */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01;
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 iam;
+ u8 reserved02[4];
+ u32 airq_count;
+ } g0;
+ struct { /* format 1 */
+ u32 next_alert;
+ u8 ipm;
+ u8 simm;
+ u8 nimm;
+ u8 iam;
+ u8 aism[8];
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 reserved03[11];
+ u32 airq_count;
+ } g1;
+ };
+};
+
/*
- * sie_page2 has to be allocated as DMA because fac_list and crycb need
- * 31bit addresses in the sie control block.
+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
+ * gisa need 31bit addresses in the sie control block.
*/
struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
- u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+ struct kvm_s390_gisa gisa; /* 0x0900 */
+ u8 reserved920[0x1000 - 0x920]; /* 0x0920 */
};
struct kvm_s390_vsie {
@@ -761,6 +810,7 @@ struct kvm_arch{
struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+ struct kvm_s390_gisa *gisa;
};
#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index d3c1a8a2e3ad..3cae9168f63c 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -77,6 +77,7 @@ struct sclp_info {
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
unsigned char has_kss : 1;
+ unsigned char has_gisaf : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 9a4594e0a1ff..a3dbd459cce9 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 89aa114a2cba..45634b3d2e0a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -257,6 +257,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
case 0x500:
return __diag_virtio_hypercall(vcpu);
default:
+ vcpu->stat.diagnose_other++;
return -EOPNOTSUPP;
}
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 024ad8bcc516..aabf46f5f883 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -36,7 +36,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
int c, scn;
- if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
return 0;
BUG_ON(!kvm_s390_use_sca_entries());
@@ -101,18 +101,17 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
/* another external call is pending */
return -EBUSY;
}
- atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
return 0;
}
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc, expect;
if (!kvm_s390_use_sca_entries())
return;
- atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -190,8 +189,8 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
static inline int is_ioirq(unsigned long irq_type)
{
- return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
- (irq_type <= IRQ_PEND_IO_ISC_7));
+ return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
+ (irq_type <= IRQ_PEND_IO_ISC_0));
}
static uint64_t isc_to_isc_bits(int isc)
@@ -199,25 +198,59 @@ static uint64_t isc_to_isc_bits(int isc)
return (0x80 >> isc) << 24;
}
+static inline u32 isc_to_int_word(u8 isc)
+{
+ return ((u32)isc << 27) | 0x80000000;
+}
+
static inline u8 int_word_to_isc(u32 int_word)
{
return (int_word & 0x38000000) >> 27;
}
+/*
+ * To use atomic bitmap functions, we have to provide a bitmap address
+ * that is u64 aligned. However, the ipm might be u32 aligned.
+ * Therefore, we logically start the bitmap at the very beginning of the
+ * struct and fixup the bit number.
+ */
+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
+
+static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
+{
+ return READ_ONCE(gisa->ipm);
+}
+
+static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ vcpu->arch.local_int.pending_irqs |
+ kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
}
static inline int isc_to_irq_type(unsigned long isc)
{
- return IRQ_PEND_IO_ISC_0 + isc;
+ return IRQ_PEND_IO_ISC_0 - isc;
}
static inline int irq_type_to_isc(unsigned long irq_type)
{
- return irq_type - IRQ_PEND_IO_ISC_0;
+ return IRQ_PEND_IO_ISC_0 - irq_type;
}
static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
@@ -278,20 +311,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+ set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+ clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
+ CPUSTAT_STOP_INT);
vcpu->arch.sie_block->lctl = 0x0000;
vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
@@ -302,17 +335,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
}
}
-static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
-{
- atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
-}
-
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
{
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
return;
else if (psw_ioint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR6;
}
@@ -322,7 +350,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
return;
if (psw_extint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR0;
}
@@ -340,7 +368,7 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
{
if (kvm_s390_is_stop_irq_pending(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
}
/* Set interception request for non-deliverable interrupts */
@@ -897,18 +925,38 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
+{
+ int rc;
+
+ rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
unsigned long irq_type)
{
struct list_head *isc_list;
struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_io_info io;
+ u32 isc;
int rc = 0;
fi = &vcpu->kvm->arch.float_int;
spin_lock(&fi->lock);
- isc_list = &fi->lists[irq_type_to_isc(irq_type)];
+ isc = irq_type_to_isc(irq_type);
+ isc_list = &fi->lists[isc];
inti = list_first_entry_or_null(isc_list,
struct kvm_s390_interrupt_info,
list);
@@ -936,24 +984,31 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
spin_unlock(&fi->lock);
if (inti) {
- rc = put_guest_lc(vcpu, inti->io.subchannel_id,
- (u16 *)__LC_SUBCHANNEL_ID);
- rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
- (u16 *)__LC_SUBCHANNEL_NR);
- rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
- (u32 *)__LC_IO_INT_PARM);
- rc |= put_guest_lc(vcpu, inti->io.io_int_word,
- (u32 *)__LC_IO_INT_WORD);
- rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
+ rc = __do_deliver_io(vcpu, &(inti->io));
kfree(inti);
+ goto out;
}
- return rc ? -EFAULT : 0;
+ if (vcpu->kvm->arch.gisa &&
+ kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
+ /*
+ * in case an adapter interrupt was not delivered
+ * in SIE context KVM will handle the delivery
+ */
+ VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
+ memset(&io, 0, sizeof(io));
+ io.io_int_word = isc_to_int_word(isc);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_IO(1, 0, 0, 0),
+ ((__u32)io.subchannel_id << 16) |
+ io.subchannel_nr,
+ ((__u64)io.io_int_parm << 32) |
+ io.io_int_word);
+ rc = __do_deliver_io(vcpu, &io);
+ }
+out:
+ return rc;
}
typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
@@ -1155,8 +1210,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
- /* bits are in the order of interrupt priority */
- irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
+ /* bits are in the reverse order of interrupt priority */
+ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -1228,7 +1283,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
li->irq.ext = irq->u.ext;
set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1253,7 +1308,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1297,7 +1352,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
return -EBUSY;
stop->flags = irq->u.stop.flags;
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
return 0;
}
@@ -1329,7 +1384,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1373,7 +1428,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1386,7 +1441,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1416,20 +1471,86 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
return NULL;
}
+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int isc;
+
+ for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+ if (isc_mask & isc_to_isc_bits(isc))
+ inti = get_io_int(kvm, isc, schid);
+ }
+ return inti;
+}
+
+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
+{
+ unsigned long active_mask;
+ int isc;
+
+ if (schid)
+ goto out;
+ if (!kvm->arch.gisa)
+ goto out;
+
+ active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
+ while (active_mask) {
+ isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
+ return isc;
+ clear_bit_inv(isc, &active_mask);
+ }
+out:
+ return -EINVAL;
+}
+
/*
* Dequeue and return an I/O interrupt matching any of the interruption
* subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ * Take into account the interrupts pending in the interrupt list and in GISA.
+ *
+ * Note that for a guest that does not enable I/O interrupts
+ * but relies on TPI, a flood of classic interrupts may starve
+ * out adapter interrupts on the same isc. Linux does not do
+ * that, and it is possible to work around the issue by configuring
+ * different iscs for classic and adapter interrupts in the guest,
+ * but we may want to revisit this in the future.
*/
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid)
{
- struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_interrupt_info *inti, *tmp_inti;
int isc;
- for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
- if (isc_mask & isc_to_isc_bits(isc))
- inti = get_io_int(kvm, isc, schid);
+ inti = get_top_io_int(kvm, isc_mask, schid);
+
+ isc = get_top_gisa_isc(kvm, isc_mask, schid);
+ if (isc < 0)
+ /* no AI in GISA */
+ goto out;
+
+ if (!inti)
+ /* AI in GISA but no classical IO int */
+ goto gisa_out;
+
+ /* both types of interrupts present */
+ if (int_word_to_isc(inti->io.io_int_word) <= isc) {
+ /* classical IO int with higher priority */
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ goto out;
}
+gisa_out:
+ tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (tmp_inti) {
+ tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ tmp_inti->io.io_int_word = isc_to_int_word(isc);
+ if (inti)
+ kvm_s390_reinject_io_int(kvm, inti);
+ inti = tmp_inti;
+ } else
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+out:
return inti;
}
@@ -1517,6 +1638,15 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
struct list_head *list;
int isc;
+ isc = int_word_to_isc(inti->io.io_int_word);
+
+ if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ kfree(inti);
+ return 0;
+ }
+
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
@@ -1532,7 +1662,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
inti->io.subchannel_id >> 8,
inti->io.subchannel_id >> 1 & 0x3,
inti->io.subchannel_nr);
- isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
@@ -1546,7 +1675,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li;
struct kvm_vcpu *dst_vcpu;
int sigcpu, online_vcpus, nr_tries = 0;
@@ -1568,20 +1696,17 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
switch (type) {
case KVM_S390_MCHK:
- atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_or(CPUSTAT_IO_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
break;
}
- spin_unlock(&li->lock);
kvm_s390_vcpu_wakeup(dst_vcpu);
}
@@ -1820,6 +1945,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
for (i = 0; i < FIRQ_MAX_COUNT; i++)
fi->counters[i] = 0;
spin_unlock(&fi->lock);
+ kvm_s390_gisa_clear(kvm);
};
static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
@@ -1847,6 +1973,22 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
max_irqs = len / sizeof(struct kvm_s390_irq);
+ if (kvm->arch.gisa &&
+ kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
+ for (i = 0; i <= MAX_ISC; i++) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out_nolock;
+ }
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ irq->u.io.io_int_word = isc_to_int_word(i);
+ n++;
+ }
+ }
+ }
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++) {
@@ -1885,6 +2027,7 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
out:
spin_unlock(&fi->lock);
+out_nolock:
if (!ret && n > 0) {
if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
ret = -EFAULT;
@@ -2245,7 +2388,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_IO(1, 0, 0, 0),
.parm = 0,
- .parm64 = (adapter->isc << 27) | 0x80000000,
+ .parm64 = isc_to_int_word(adapter->isc),
};
int ret = 0;
@@ -2687,3 +2830,28 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
return n;
}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+ if (kvm->arch.gisa) {
+ memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
+ kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
+ }
+}
+
+void kvm_s390_gisa_init(struct kvm *kvm)
+{
+ if (css_general_characteristics.aiv) {
+ kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
+ kvm_s390_gisa_clear(kvm);
+ }
+}
+
+void kvm_s390_gisa_destroy(struct kvm *kvm)
+{
+ if (!kvm->arch.gisa)
+ return;
+ kvm->arch.gisa = NULL;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1371dff2b90d..ba4c7092335a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2017
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <[email protected]>
* Christian Borntraeger <[email protected]>
@@ -87,19 +87,31 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_epsw", VCPU_STAT(instruction_epsw) },
+ { "instruction_gs", VCPU_STAT(instruction_gs) },
+ { "instruction_io_other", VCPU_STAT(instruction_io_other) },
+ { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
+ { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
+ { "instruction_ptff", VCPU_STAT(instruction_ptff) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_sck", VCPU_STAT(instruction_sck) },
+ { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
- { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_iske", VCPU_STAT(instruction_iske) },
+ { "instruction_ri", VCPU_STAT(instruction_ri) },
+ { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
+ { "instruction_sske", VCPU_STAT(instruction_sske) },
{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- { "instruction_stsch", VCPU_STAT(instruction_stsch) },
- { "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_tb", VCPU_STAT(instruction_tb) },
+ { "instruction_tpi", VCPU_STAT(instruction_tpi) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_tsch", VCPU_STAT(instruction_tsch) },
{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
@@ -118,12 +130,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- { "diagnose_10", VCPU_STAT(diagnose_10) },
- { "diagnose_44", VCPU_STAT(diagnose_44) },
- { "diagnose_9c", VCPU_STAT(diagnose_9c) },
- { "diagnose_258", VCPU_STAT(diagnose_258) },
- { "diagnose_308", VCPU_STAT(diagnose_308) },
- { "diagnose_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_10", VCPU_STAT(diagnose_10) },
+ { "instruction_diag_44", VCPU_STAT(diagnose_44) },
+ { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "instruction_diag_258", VCPU_STAT(diagnose_258) },
+ { "instruction_diag_308", VCPU_STAT(diagnose_308) },
+ { "instruction_diag_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_other", VCPU_STAT(diagnose_other) },
{ NULL }
};
@@ -576,7 +589,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
case KVM_CAP_S390_GS:
r = -EINVAL;
mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus)) {
+ if (kvm->created_vcpus) {
r = -EBUSY;
} else if (test_facility(133)) {
set_kvm_facility(kvm->arch.model.fac_mask, 133);
@@ -1088,7 +1101,6 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
- int ret = -EBUSY;
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
return -EFAULT;
@@ -1098,13 +1110,18 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
return -EINVAL;
mutex_lock(&kvm->lock);
- if (!atomic_read(&kvm->online_vcpus)) {
- bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
- ret = 0;
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
}
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
mutex_unlock(&kvm->lock);
- return ret;
+ VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
}
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
@@ -1206,6 +1223,10 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1219,6 +1240,10 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1911,6 +1936,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
+ BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
kvm->arch.sie_page2 =
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.sie_page2)
@@ -1981,6 +2007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2043,6 +2070,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_vcpus(kvm);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
+ kvm_s390_gisa_destroy(kvm);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2314,7 +2342,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
gmap_enable(vcpu->arch.enabled_gmap);
- atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
vcpu->cpu = cpu;
@@ -2325,7 +2353,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
- atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
vcpu->arch.enabled_gmap = gmap_get_enabled();
gmap_disable(vcpu->arch.enabled_gmap);
@@ -2422,9 +2450,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
CPUSTAT_STOPPED);
if (test_kvm_facility(vcpu->kvm, 78))
- atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
else if (test_kvm_facility(vcpu->kvm, 8))
- atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
kvm_s390_vcpu_setup_model(vcpu);
@@ -2456,12 +2484,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF;
+ if (vcpu->arch.sie_block->gd) {
+ vcpu->arch.sie_block->eca |= ECA_AIV;
+ VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+ vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+ }
vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
| SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
if (sclp.has_kss)
- atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
else
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
@@ -2508,9 +2541,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- vcpu->arch.local_int.wq = &vcpu->wq;
- vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
+ if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
+ vcpu->arch.sie_block->gd |= GISA_FORMAT1;
seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
@@ -2567,7 +2600,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -2720,47 +2753,70 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- if (test_fp_ctl(fpu->fpc))
- return -EINVAL;
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
+ if (test_fp_ctl(fpu->fpc)) {
+ ret = -EINVAL;
+ goto out;
+ }
vcpu->run->s.regs.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- return 0;
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ vcpu_load(vcpu);
+
/* make sure we have the latest values */
save_fpu_regs();
if (MACHINE_HAS_VX)
@@ -2769,6 +2825,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
else
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->run->s.regs.fpc;
+
+ vcpu_put(vcpu);
return 0;
}
@@ -2800,41 +2858,56 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- if (dbg->control & ~VALID_GUESTDBG_FLAGS)
- return -EINVAL;
- if (!sclp.has_gpere)
- return -EINVAL;
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
+ rc = -EINVAL;
+ goto out;
+ }
+ if (!sclp.has_gpere) {
+ rc = -EINVAL;
+ goto out;
+ }
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
- atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
}
+out:
+ vcpu_put(vcpu);
return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
/* CHECK_STOP and LOAD are not supported yet */
- return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
- KVM_MP_STATE_OPERATING;
+ ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
@@ -2842,6 +2915,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
@@ -2859,12 +2934,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
rc = -ENXIO;
}
+ vcpu_put(vcpu);
return rc;
}
static bool ibs_enabled(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
@@ -2900,8 +2976,7 @@ retry:
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
- atomic_or(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -2909,8 +2984,7 @@ retry:
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
- atomic_andnot(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -3390,9 +3464,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (kvm_run->immediate_exit)
return -EINTR;
+ vcpu_load(vcpu);
+
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
- return 0;
+ rc = 0;
+ goto out;
}
kvm_sigset_activate(vcpu);
@@ -3402,7 +3479,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else if (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
sync_regs(vcpu, kvm_run);
@@ -3432,6 +3510,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_deactivate(vcpu);
vcpu->stat.exit_userspace++;
+out:
+ vcpu_put(vcpu);
return rc;
}
@@ -3560,7 +3640,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
- atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
@@ -3586,7 +3666,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
- atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -3693,36 +3773,45 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- int idx;
- long r;
switch (ioctl) {
case KVM_S390_IRQ: {
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
- break;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
- break;
+ return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ }
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int idx;
+ long r;
+
+ vcpu_load(vcpu);
+
+ switch (ioctl) {
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
@@ -3847,6 +3936,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
default:
r = -ENOTTY;
}
+
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 5e46ba429bcb..bd31b37b0e6f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -47,14 +47,29 @@ do { \
d_args); \
} while (0)
+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
+}
+
static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
}
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{
- return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -367,6 +382,9 @@ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
__u8 __user *buf, int len);
+void kvm_s390_gisa_init(struct kvm *kvm);
+void kvm_s390_gisa_clear(struct kvm *kvm);
+void kvm_s390_gisa_destroy(struct kvm *kvm);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0714bfa56da0..c4c4e157c036 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2013
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <[email protected]>
* Christian Borntraeger <[email protected]>
@@ -34,6 +34,8 @@
static int handle_ri(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ri++;
+
if (test_kvm_facility(vcpu->kvm, 64)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
vcpu->arch.sie_block->ecb3 |= ECB3_RI;
@@ -53,6 +55,8 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
static int handle_gs(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_gs++;
+
if (test_kvm_facility(vcpu->kvm, 133)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
preempt_disable();
@@ -85,6 +89,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
u8 ar;
u64 op2, val;
+ vcpu->stat.instruction_sck++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -203,14 +209,14 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
trace_kvm_s390_skey_related_inst(vcpu);
if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
- !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
+ !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
return rc;
rc = s390_enable_skey();
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
if (!rc) {
- if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
- atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
+ if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
else
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
ICTL_RRBE);
@@ -222,7 +228,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
{
int rc;
- vcpu->stat.instruction_storage_key++;
rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
@@ -242,6 +247,8 @@ static int handle_iske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_iske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -274,6 +281,8 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_rrbe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -312,6 +321,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_sske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -392,6 +403,8 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
gpa_t addr;
int reg2;
+ vcpu->stat.instruction_tb++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -424,6 +437,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
u64 addr;
u8 ar;
+ vcpu->stat.instruction_tpi++;
+
addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -484,6 +499,8 @@ static int handle_tsch(struct kvm_vcpu *vcpu)
struct kvm_s390_interrupt_info *inti = NULL;
const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+ vcpu->stat.instruction_tsch++;
+
/* a valid schid has at least one bit set */
if (vcpu->run->s.regs.gprs[1])
inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
@@ -527,6 +544,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->ipa == 0xb235)
return handle_tsch(vcpu);
/* Handle in userspace. */
+ vcpu->stat.instruction_io_other++;
return -EOPNOTSUPP;
} else {
/*
@@ -592,6 +610,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpsw++;
+
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -619,6 +639,8 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpswe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -828,6 +850,8 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
{
int reg1, reg2;
+ vcpu->stat.instruction_epsw++;
+
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
@@ -1332,6 +1356,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
{
u32 value;
+ vcpu->stat.instruction_sckpf++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -1347,6 +1373,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
static int handle_ptff(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ptff++;
+
/* we don't emulate any control instructions yet */
kvm_s390_set_psw_cc(vcpu, 3);
return 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c1f5cde2c878..683036c1c92a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,22 +20,18 @@
static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
- int cpuflags;
+ const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
int rc;
int ext_call_pending;
- li = &dst_vcpu->arch.local_int;
-
- cpuflags = atomic_read(li->cpuflags);
ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
- if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
+ if (!stopped && !ext_call_pending)
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
if (ext_call_pending)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
- if (cpuflags & CPUSTAT_STOPPED)
+ if (stopped)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
@@ -208,11 +204,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu,
u32 addr, u64 *reg)
{
- int flags;
int rc;
- flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
- if (!(flags & CPUSTAT_STOPPED)) {
+ if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
return SIGP_CC_STATUS_STORED;
@@ -231,7 +225,6 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
static int __sigp_sense_running(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu, u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
int rc;
if (!test_kvm_facility(vcpu->kvm, 9)) {
@@ -240,8 +233,7 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
return SIGP_CC_STATUS_STORED;
}
- li = &dst_vcpu->arch.local_int;
- if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
/* running */
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
} else {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 751348348477..ec772700ff96 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -28,13 +28,23 @@ struct vsie_page {
* the same offset as that in struct sie_page!
*/
struct mcck_volatile_info mcck_info; /* 0x0200 */
- /* the pinned originial scb */
+ /*
+ * The pinned original scb. Be aware that other VCPUs can modify
+ * it while we read from it. Values that are used for conditions or
+ * are reused conditionally, should be accessed via READ_ONCE.
+ */
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
unsigned long fault_addr; /* 0x0228 */
- __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
+ /* calculated guest addresses of satellite control blocks */
+ gpa_t sca_gpa; /* 0x0230 */
+ gpa_t itdba_gpa; /* 0x0238 */
+ gpa_t gvrd_gpa; /* 0x0240 */
+ gpa_t riccbd_gpa; /* 0x0248 */
+ gpa_t sdnx_gpa; /* 0x0250 */
+ __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -140,12 +150,13 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
- u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
+ const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
+ const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
scb_s->crycbd = 0;
- if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
+ if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
return 0;
/* format-1 is supported with message-security-assist extension 3 */
if (!test_kvm_facility(vcpu->kvm, 76))
@@ -183,12 +194,15 @@ static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_ibc = scb_o->ibc;
+ const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
scb_s->ibc = 0;
/* ibc installed in g2 and requested for g3 */
- if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
- scb_s->ibc = scb_o->ibc & 0x0fffU;
+ if (vcpu->kvm->arch.model.ibc && new_ibc) {
+ scb_s->ibc = new_ibc;
/* takte care of the minimum ibc level of the machine */
if (scb_s->ibc < min_ibc)
scb_s->ibc = min_ibc;
@@ -259,6 +273,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_prefix = scb_o->prefix;
+ const uint32_t new_prefix = READ_ONCE(__new_prefix);
+ const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
bool had_tx = scb_s->ecb & ECB_TE;
unsigned long new_mso = 0;
int rc;
@@ -306,14 +324,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->icpua = scb_o->icpua;
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
- new_mso = scb_o->mso & 0xfffffffffff00000UL;
+ new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
/* if the hva of the prefix changes, we have to remap the prefix */
- if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
+ if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
prefix_unmapped(vsie_page);
/* SIE will do mso/msl validity and exception checks for us */
scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
scb_s->mso = new_mso;
- scb_s->prefix = scb_o->prefix;
+ scb_s->prefix = new_prefix;
/* We have to definetly flush the tlb if this scb never ran */
if (scb_s->ihcpu != 0xffffU)
@@ -325,11 +343,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
/* transactional execution */
- if (test_kvm_facility(vcpu->kvm, 73)) {
+ if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
/* remap the prefix is tx is toggled on */
- if ((scb_o->ecb & ECB_TE) && !had_tx)
+ if (!had_tx)
prefix_unmapped(vsie_page);
- scb_s->ecb |= scb_o->ecb & ECB_TE;
+ scb_s->ecb |= ECB_TE;
}
/* branch prediction */
if (test_kvm_facility(vcpu->kvm, 82))
@@ -473,46 +491,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
- struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
hpa_t hpa;
- gpa_t gpa;
hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
if (hpa) {
- gpa = scb_o->scaol & ~0xfUL;
- if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+ vsie_page->sca_gpa = 0;
scb_s->scaol = 0;
scb_s->scaoh = 0;
}
hpa = scb_s->itdba;
if (hpa) {
- gpa = scb_o->itdba & ~0xffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+ vsie_page->itdba_gpa = 0;
scb_s->itdba = 0;
}
hpa = scb_s->gvrd;
if (hpa) {
- gpa = scb_o->gvrd & ~0x1ffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+ vsie_page->gvrd_gpa = 0;
scb_s->gvrd = 0;
}
hpa = scb_s->riccbd;
if (hpa) {
- gpa = scb_o->riccbd & ~0x3fUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+ vsie_page->riccbd_gpa = 0;
scb_s->riccbd = 0;
}
hpa = scb_s->sdnxo;
if (hpa) {
- gpa = scb_o->sdnxo;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+ vsie_page->sdnx_gpa = 0;
scb_s->sdnxo = 0;
}
}
@@ -539,9 +553,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
gpa_t gpa;
int rc = 0;
- gpa = scb_o->scaol & ~0xfUL;
+ gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
+ gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
if (gpa) {
if (!(gpa & ~0x1fffUL))
rc = set_validity_icpt(scb_s, 0x0038U);
@@ -557,11 +571,12 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
if (rc)
goto unpin;
+ vsie_page->sca_gpa = gpa;
scb_s->scaoh = (u32)((u64)hpa >> 32);
scb_s->scaol = (u32)(u64)hpa;
}
- gpa = scb_o->itdba & ~0xffUL;
+ gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
if (gpa && (scb_s->ecb & ECB_TE)) {
if (!(gpa & ~0x1fffU)) {
rc = set_validity_icpt(scb_s, 0x0080U);
@@ -573,10 +588,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
}
+ vsie_page->itdba_gpa = gpa;
scb_s->itdba = hpa;
}
- gpa = scb_o->gvrd & ~0x1ffUL;
+ gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x1310U);
@@ -591,10 +607,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x1310U);
goto unpin;
}
+ vsie_page->gvrd_gpa = gpa;
scb_s->gvrd = hpa;
}
- gpa = scb_o->riccbd & ~0x3fUL;
+ gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
if (gpa && (scb_s->ecb3 & ECB3_RI)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0043U);
@@ -607,13 +624,14 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
goto unpin;
}
/* Validity 0x0044 will be checked by SIE */
+ vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
unsigned long sdnxc;
- gpa = scb_o->sdnxo & ~0xfUL;
- sdnxc = scb_o->sdnxo & 0xfUL;
+ gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
+ sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
if (!gpa || !(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
@@ -634,6 +652,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
}
+ vsie_page->sdnx_gpa = gpa;
scb_s->sdnxo = hpa | sdnxc;
}
return 0;
@@ -778,7 +797,7 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
@@ -904,7 +923,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
* External calls have to lead to a kick of the vcpu and
* therefore the vsie -> Simulate Wait state.
*/
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
/*
* We have to adjust the g3 epoch by the g2 epoch. The epoch will
* automatically be adjusted on tod clock changes via kvm_sync_clock.
@@ -926,7 +945,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
*/
static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
WRITE_ONCE(vcpu->arch.vsie_block, NULL);
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 05d459b638f5..2c55a2b9d6c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -815,27 +815,17 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
* @ptl: pointer to the spinlock pointer
*
* Returns a pointer to the locked pte for a guest address, or NULL
- *
- * Note: Can also be called for shadow gmaps.
*/
static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
spinlock_t **ptl)
{
unsigned long *table;
- if (gmap_is_shadow(gmap))
- spin_lock(&gmap->guest_table_lock);
+ BUG_ON(gmap_is_shadow(gmap));
/* Walk the gmap page table, lock and get pte pointer */
table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
- if (!table || *table & _SEGMENT_ENTRY_INVALID) {
- if (gmap_is_shadow(gmap))
- spin_unlock(&gmap->guest_table_lock);
+ if (!table || *table & _SEGMENT_ENTRY_INVALID)
return NULL;
- }
- if (gmap_is_shadow(gmap)) {
- *ptl = &gmap->guest_table_lock;
- return pte_offset_map((pmd_t *) table, gaddr);
- }
return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
}
@@ -889,8 +879,6 @@ static void gmap_pte_op_end(spinlock_t *ptl)
* -EFAULT if gaddr is invalid (or mapping for shadows is missing).
*
* Called with sg->mm->mmap_sem in read.
- *
- * Note: Can also be called for shadow gmaps.
*/
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
@@ -900,6 +888,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
pte_t *ptep;
int rc;
+ BUG_ON(gmap_is_shadow(gmap));
while (len) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -960,7 +949,8 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
* @val: pointer to the unsigned long value to return
*
* Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
- * if reading using the virtual address failed.
+ * if reading using the virtual address failed. -EINVAL if called on a gmap
+ * shadow.
*
* Called with gmap->mm->mmap_sem in read.
*/
@@ -971,6 +961,9 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
pte_t *ptep, pte;
int rc;
+ if (gmap_is_shadow(gmap))
+ return -EINVAL;
+
while (1) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -1028,18 +1021,17 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
}
/**
- * gmap_protect_rmap - modify access rights to memory and create an rmap
+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
* @sg: pointer to the shadow guest address space structure
* @raddr: rmap address in the shadow gmap
* @paddr: address in the parent guest address space
* @len: length of the memory area to protect
- * @prot: indicates access rights: none, read-only or read-write
*
* Returns 0 if successfully protected and the rmap was created, -ENOMEM
* if out of memory and -EFAULT if paddr is invalid.
*/
static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
- unsigned long paddr, unsigned long len, int prot)
+ unsigned long paddr, unsigned long len)
{
struct gmap *parent;
struct gmap_rmap *rmap;
@@ -1067,7 +1059,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
ptep = gmap_pte_op_walk(parent, paddr, &ptl);
if (ptep) {
spin_lock(&sg->guest_table_lock);
- rc = ptep_force_prot(parent->mm, paddr, ptep, prot,
+ rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
PGSTE_VSIE_BIT);
if (!rc)
gmap_insert_rmap(sg, vmaddr, rmap);
@@ -1077,7 +1069,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
radix_tree_preload_end();
if (rc) {
kfree(rmap);
- rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+ rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
if (rc)
return rc;
continue;
@@ -1616,7 +1608,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
origin = r2t & _REGION_ENTRY_ORIGIN;
offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 4);
@@ -1699,7 +1691,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
origin = r3t & _REGION_ENTRY_ORIGIN;
offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 3);
@@ -1783,7 +1775,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
origin = sgt & _REGION_ENTRY_ORIGIN;
offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 2);
@@ -1902,7 +1894,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* Make pgt read-only in parent gmap page table (not the pgste) */
raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
- rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 1);
@@ -2005,7 +1997,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long gaddr, pte_t *pte)
+ unsigned long gaddr)
{
struct gmap_rmap *rmap, *rnext, *head;
unsigned long start, end, bits, raddr;
@@ -2090,7 +2082,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
spin_lock(&gmap->shadow_lock);
list_for_each_entry_safe(sg, next,
&gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ gmap_shadow_notify(sg, vmaddr, gaddr);
spin_unlock(&gmap->shadow_lock);
}
if (bits & PGSTE_IN_BIT)
diff --git a/arch/sparc/include/uapi/asm/poll.h b/arch/sparc/include/uapi/asm/poll.h
index 2a81e79aa3ea..72356c999125 100644
--- a/arch/sparc/include/uapi/asm/poll.h
+++ b/arch/sparc/include/uapi/asm/poll.h
@@ -2,31 +2,11 @@
#ifndef __SPARC_POLL_H
#define __SPARC_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#define POLLMSG (__force __poll_t)512
-#define POLLREMOVE (__force __poll_t)1024
-#define POLLRDHUP (__force __poll_t)2048
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2, bit 13 -> bit 11 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6) |
- ((v & 0x2000) >> 2);
-
-
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6) | ((v & 0x800) << 2));
-}
-#endif
+#define POLLWRBAND 256
+#define POLLMSG 512
+#define POLLREMOVE 1024
+#define POLLRDHUP 2048
#include <asm-generic/poll.h>
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index abee6d2b9311..16c2c022540d 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -900,6 +900,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
hyperv_vector_handler)
+BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_intr)
+
#endif /* CONFIG_HYPERV */
ENTRY(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4a9bef6aca34..30c8c5344c4a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1136,6 +1136,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
#if IS_ENABLED(CONFIG_HYPERV)
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
+
+apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
+ hyperv_reenlightenment_vector hyperv_reenlightenment_intr
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a0a206556919..2edc49e7409b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -18,6 +18,8 @@
*/
#include <linux/types.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
@@ -37,6 +39,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return tsc_pg;
}
+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
static u64 read_hv_clock_tsc(struct clocksource *arg)
{
@@ -101,6 +104,115 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
}
+static void (*hv_reenlightenment_cb)(void);
+
+static void hv_reenlightenment_notify(struct work_struct *dummy)
+{
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ /* Don't issue the callback if TSC accesses are not emulated */
+ if (hv_reenlightenment_cb && emu_status.inprogress)
+ hv_reenlightenment_cb();
+}
+static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
+
+void hyperv_stop_tsc_emulation(void)
+{
+ u64 freq;
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ emu_status.inprogress = 0;
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ tsc_khz = div64_u64(freq, 1000);
+}
+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
+
+static inline bool hv_reenlightenment_available(void)
+{
+ /*
+ * Check for required features and priviliges to make TSC frequency
+ * change notifications work.
+ */
+ return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
+ ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
+}
+
+__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
+{
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_reenlightenment_count);
+
+ schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
+
+ exiting_irq();
+}
+
+void set_hv_tscchange_cb(void (*cb)(void))
+{
+ struct hv_reenlightenment_control re_ctrl = {
+ .vector = HYPERV_REENLIGHTENMENT_VECTOR,
+ .enabled = 1,
+ .target_vp = hv_vp_index[smp_processor_id()]
+ };
+ struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+ if (!hv_reenlightenment_available()) {
+ pr_warn("Hyper-V: reenlightenment support is unavailable\n");
+ return;
+ }
+
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
+ wmb();
+
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+}
+EXPORT_SYMBOL_GPL(set_hv_tscchange_cb);
+
+void clear_hv_tscchange_cb(void)
+{
+ struct hv_reenlightenment_control re_ctrl;
+
+ if (!hv_reenlightenment_available())
+ return;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ re_ctrl.enabled = 0;
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+
+ hv_reenlightenment_cb = NULL;
+}
+EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb);
+
+static int hv_cpu_die(unsigned int cpu)
+{
+ struct hv_reenlightenment_control re_ctrl;
+ unsigned int new_cpu;
+
+ if (hv_reenlightenment_cb == NULL)
+ return 0;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ if (re_ctrl.target_vp == hv_vp_index[cpu]) {
+ /* Reassign to some other online CPU */
+ new_cpu = cpumask_any_but(cpu_online_mask, cpu);
+
+ re_ctrl.target_vp = hv_vp_index[new_cpu];
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ }
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -110,12 +222,19 @@ static int hv_cpu_init(unsigned int cpu)
*/
void hyperv_init(void)
{
- u64 guest_id;
+ u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
+ /* Absolutely required MSRs */
+ required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
+ HV_X64_MSR_VP_INDEX_AVAILABLE;
+
+ if ((ms_hyperv.features & required_msrs) != required_msrs)
+ return;
+
/* Allocate percpu VP index */
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
GFP_KERNEL);
@@ -123,7 +242,7 @@ void hyperv_init(void)
return;
if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
- hv_cpu_init, NULL) < 0)
+ hv_cpu_init, hv_cpu_die) < 0)
goto free_vp_index;
/*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..0dfe4d3f74e2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 51cc979dd364..7c341a74ec8c 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -38,6 +38,9 @@ typedef struct {
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count;
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ unsigned int irq_hv_reenlightenment_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 67421f649cfa..e71c1120426b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,7 +103,12 @@
#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
-#define LOCAL_TIMER_VECTOR 0xee
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#endif
+
+#define LOCAL_TIMER_VECTOR 0xed
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 516798431328..dd6f57a54a26 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,7 +86,7 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
int mp_state;
u64 ia32_misc_enable_msr;
u64 smbase;
+ u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
@@ -760,6 +761,15 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -847,6 +857,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+
+ struct kvm_sev_info sev_info;
};
struct kvm_vm_stat {
@@ -883,7 +895,6 @@ struct kvm_vcpu_stat {
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
- u64 efer_reload;
u64 fpu_reload;
u64 insn_emulation;
u64 insn_emulation_fail;
@@ -965,7 +976,7 @@ struct kvm_x86_ops {
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*tlb_flush)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1017,6 +1028,7 @@ struct kvm_x86_ops {
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
+ bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
@@ -1079,6 +1091,10 @@ struct kvm_x86_ops {
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+
+ int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+ int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index b52af150cbd8..25283f7eb299 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
void hyperv_callback_vector(void);
+void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
@@ -316,18 +317,27 @@ void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
bool hv_is_hyperv_initialized(void);
void hyperv_cleanup(void);
+
+void hyperv_reenlightenment_intr(struct pt_regs *regs);
+void set_hv_tscchange_cb(void (*cb)(void));
+void clear_hv_tscchange_cb(void);
+void hyperv_stop_tsc_emulation(void);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
+static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
+static inline void clear_hv_tscchange_cb(void) {}
+static inline void hyperv_stop_tsc_emulation(void) {};
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
{
- u64 scale, offset, cur_tsc;
+ u64 scale, offset;
u32 sequence;
/*
@@ -358,7 +368,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
- cur_tsc = rdtsc_ordered();
+ *cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
@@ -368,7 +378,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
- return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
+}
+
+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+
+ return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
}
#else
@@ -376,5 +393,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
+
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
+{
+ BUG();
+ return U64_MAX;
+}
#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e520a1e6fc11..c9084dedfcfa 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -397,6 +397,8 @@
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 8a3ee355b422..92015c65fa2a 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -22,4 +22,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
void io_free_memtype(resource_size_t start, resource_size_t end);
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 78dd9df88157..0487ac054870 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -146,6 +146,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 1a5bfead93b4..197c2e6c7376 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -40,6 +40,9 @@
*/
#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
+/* AccessReenlightenmentControls privilege */
+#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
@@ -234,6 +237,30 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+
+struct hv_reenlightenment_control {
+ u64 vector:8;
+ u64 reserved1:8;
+ u64 enabled:1;
+ u64 reserved2:15;
+ u64 target_vp:32;
+};
+
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+
+struct hv_tsc_emulation_control {
+ u64 enabled:1;
+ u64 reserved:63;
+};
+
+struct hv_tsc_emulation_status {
+ u64 inprogress:1;
+ u64 reserved:63;
+};
+
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 09cc06483bed..7a2ade4aa235 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -25,6 +25,7 @@
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
+#define KVM_FEATURE_PV_TLB_FLUSH 9
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -51,6 +52,9 @@ struct kvm_steal_time {
__u32 pad[11];
};
+#define KVM_VCPU_PREEMPTED (1 << 0)
+#define KVM_VCPU_FLUSH_TLB (1 << 1)
+
#define KVM_CLOCK_PAIRING_WALLCLOCK 0
struct kvm_clock_pairing {
__s64 sec;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dc0ca8e29c75..dfcbe6924eaf 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1515,7 +1515,7 @@ static __poll_t do_poll(struct file *fp, poll_table *wait)
return 0;
poll_wait(fp, &apm_waitqueue, wait);
if (!queue_empty(as))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ea831c858195..5bddbdcbc4a3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
}
+static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ /*
+ * BIOS support is required for SME and SEV.
+ * For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+ * the SME physical address space reduction value.
+ * If BIOS has not enabled SME then don't advertise the
+ * SME feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise the
+ * SEV feature (set in scattered.c).
+ *
+ * In all cases, since support for SME and SEV requires long mode,
+ * don't advertise the feature under CONFIG_X86_32.
+ */
+ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
+ /* Check if memory encryption is enabled */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ goto clear_all;
+
+ /*
+ * Always adjust physical address bits. Even though this
+ * will be a value above 32-bits this is still done for
+ * CONFIG_X86_32 so that accurate values are reported.
+ */
+ c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto clear_all;
+
+ rdmsrl(MSR_K7_HWCR, msr);
+ if (!(msr & MSR_K7_HWCR_SMMLOCK))
+ goto clear_sev;
+
+ return;
+
+clear_all:
+ clear_cpu_cap(c, X86_FEATURE_SME);
+clear_sev:
+ clear_cpu_cap(c, X86_FEATURE_SEV);
+ }
+}
+
static void early_init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
@@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
- /*
- * BIOS support is required for SME. If BIOS has enabled SME then
- * adjust x86_phys_bits by the SME physical address space reduction
- * value. If BIOS has not enabled SME then don't advertise the
- * feature (set in scattered.c). Also, since the SME support requires
- * long mode, don't advertise the feature under CONFIG_X86_32.
- */
- if (cpu_has(c, X86_FEATURE_SME)) {
- u64 msr;
-
- /* Check if SME is enabled */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
- c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
- if (IS_ENABLED(CONFIG_X86_32))
- clear_cpu_cap(c, X86_FEATURE_SME);
- } else {
- clear_cpu_cap(c, X86_FEATURE_SME);
- }
- }
+ early_detect_mem_encrypt(c);
}
static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 213e8c2ca702..97685a0c3175 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -247,9 +247,9 @@ static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
if (READ_ONCE(mcelog.next))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 85eb5fc180c8..9340f41ce8d3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void)
hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+
+ /* Setup the IDT for reenlightenment notifications */
+ if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
+ alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_vector);
+
#endif
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 4075d2be5357..772c219b6889 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -30,6 +30,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
+ { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 68e1867cca80..45fb4d2565f8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -142,6 +142,15 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Hypervisor callback interrupts\n");
}
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
+ seq_printf(p, "%*s: ", prec, "HRE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->irq_hv_reenlightenment_count);
+ seq_puts(p, " Hyper-V reenlightenment interrupts\n");
+ }
+#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b40ffbf156c1..4e37d1a851a6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
+static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
+
+static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+{
+ u8 state;
+ int cpu;
+ struct kvm_steal_time *src;
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+
+ cpumask_copy(flushmask, cpumask);
+ /*
+ * We have to call flush only on online vCPUs. And
+ * queue flush_on_enter for pre-empted vCPUs
+ */
+ for_each_cpu(cpu, flushmask) {
+ src = &per_cpu(steal_time, cpu);
+ state = READ_ONCE(src->preempted);
+ if ((state & KVM_VCPU_PREEMPTED)) {
+ if (try_cmpxchg(&src->preempted, &state,
+ state | KVM_VCPU_FLUSH_TLB))
+ __cpumask_clear_cpu(cpu, flushmask);
+ }
+ }
+
+ native_flush_tlb_others(flushmask, info);
+}
+
static void __init kvm_guest_init(void)
{
int i;
@@ -517,6 +545,9 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
+
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
@@ -598,6 +629,22 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
+static __init int kvm_setup_pv_tlb_flush(void)
+{
+ int cpu;
+
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ for_each_possible_cpu(cpu) {
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ }
+ pr_info("KVM setup pv remote TLB flush\n");
+ }
+
+ return 0;
+}
+arch_initcall(kvm_setup_pv_tlb_flush);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
@@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu)
{
struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
- return !!src->preempted;
+ return !!(src->preempted & KVM_VCPU_PREEMPTED);
}
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3df51c287844..92fd433c50b9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -81,6 +81,14 @@ config KVM_AMD
To compile this as a module, choose M here: the module
will be called kvm-amd.
+config KVM_AMD_SEV
+ def_bool y
+ bool "AMD Secure Encrypted Virtualization (SEV) support"
+ depends on KVM_AMD && X86_64
+ depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
+ ---help---
+ Provides support for launching Encrypted VMs on AMD processors.
+
config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 13f5d4217e4f..a0c5a69bc7c4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
{
switch (func) {
case 0:
- entry->eax = 1; /* only one leaf currently */
+ entry->eax = 7;
++*nent;
break;
case 1:
entry->ecx = F(MOVBE);
++*nent;
break;
+ case 7:
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ if (index == 0)
+ entry->ecx = F(RDPID);
+ ++*nent;
default:
break;
}
@@ -325,6 +330,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
+ unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -363,7 +369,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+ F(TOPOEXT);
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
@@ -389,8 +396,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) |
- 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -476,6 +484,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
@@ -597,7 +606,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- (1 << KVM_FEATURE_PV_UNHALT);
+ (1 << KVM_FEATURE_PV_UNHALT) |
+ (1 << KVM_FEATURE_PV_TLB_FLUSH);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -607,7 +617,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = 0;
break;
case 0x80000000:
- entry->eax = min(entry->eax, 0x8000001a);
+ entry->eax = min(entry->eax, 0x8000001f);
break;
case 0x80000001:
entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 290ecf711aec..d91eaeb01034 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3533,6 +3533,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_rdpid(struct x86_emulate_ctxt *ctxt)
+{
+ u64 tsc_aux = 0;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
+ return emulate_gp(ctxt, 0);
+ ctxt->dst.val = tsc_aux;
+ return X86EMUL_CONTINUE;
+}
+
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
u64 tsc = 0;
@@ -3652,17 +3662,27 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
{
- if (ctxt->modrm_reg > VCPU_SREG_GS)
- return emulate_ud(ctxt);
+ if (segment > VCPU_SREG_GS &&
+ (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
- ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+ ctxt->dst.val = get_segment_selector(ctxt, segment);
if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
return X86EMUL_CONTINUE;
}
+static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->modrm_reg > VCPU_SREG_GS)
+ return emulate_ud(ctxt);
+
+ return em_store_sreg(ctxt, ctxt->modrm_reg);
+}
+
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3678,6 +3698,11 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
}
+static int em_sldt(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_LDTR);
+}
+
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3687,6 +3712,11 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}
+static int em_str(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_TR);
+}
+
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3739,6 +3769,10 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
{
struct desc_ptr desc_ptr;
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->mode == X86EMUL_MODE_PROT64)
ctxt->op_bytes = 8;
get(ctxt, &desc_ptr);
@@ -3798,6 +3832,10 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
@@ -4383,8 +4421,8 @@ static const struct opcode group5[] = {
};
static const struct opcode group6[] = {
- DI(Prot | DstMem, sldt),
- DI(Prot | DstMem, str),
+ II(Prot | DstMem, em_sldt, sldt),
+ II(Prot | DstMem, em_str, str),
II(Prot | Priv | SrcMem16, em_lldt, lldt),
II(Prot | Priv | SrcMem16, em_ltr, ltr),
N, N, N, N,
@@ -4415,10 +4453,20 @@ static const struct opcode group8[] = {
F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
+/*
+ * The "memory" destination is actually always a register, since we come
+ * from the register case of group9.
+ */
+static const struct gprefix pfx_0f_c7_7 = {
+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+};
+
+
static const struct group_dual group9 = { {
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
- N, N, N, N, N, N, N, N,
+ N, N, N, N, N, N, N,
+ GP(0, &pfx_0f_c7_7),
} };
static const struct opcode group11[] = {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 5c24811e8b0b..f171051eecf3 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -79,7 +79,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (kvm_cpu_has_extint(v))
return 1;
- if (kvm_vcpu_apicv_active(v))
+ if (!is_guest_mode(v) && kvm_vcpu_apicv_active(v))
return 0;
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e2c1fb8d35ce..924ac8ce9d50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -364,32 +364,41 @@ static u8 count_vectors(void *bitmap)
return count;
}
-int __kvm_apic_update_irr(u32 *pir, void *regs)
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
{
u32 i, vec;
- u32 pir_val, irr_val;
- int max_irr = -1;
+ u32 pir_val, irr_val, prev_irr_val;
+ int max_updated_irr;
+
+ max_updated_irr = -1;
+ *max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
pir_val = READ_ONCE(pir[i]);
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
if (pir_val) {
+ prev_irr_val = irr_val;
irr_val |= xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
+ if (prev_irr_val != irr_val) {
+ max_updated_irr =
+ __fls(irr_val ^ prev_irr_val) + vec;
+ }
}
if (irr_val)
- max_irr = __fls(irr_val) + vec;
+ *max_irr = __fls(irr_val) + vec;
}
- return max_irr;
+ return ((max_updated_irr != -1) &&
+ (max_updated_irr == *max_irr));
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- return __kvm_apic_update_irr(pir, apic->regs);
+ return __kvm_apic_update_irr(pir, apic->regs, max_irr);
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
@@ -581,7 +590,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
- if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
+ if (apic->vcpu->arch.apicv_active)
highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4b9935a38347..56c36014f7b7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -75,8 +75,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
-int __kvm_apic_update_irr(u32 *pir, void *regs);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2b8eb4da4d08..8eca1d04aeb8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -42,6 +42,7 @@
#include <linux/kern_levels.h>
#include <asm/page.h>
+#include <asm/pat.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/vmx.h>
@@ -381,7 +382,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-void kvm_mmu_clear_all_pte_masks(void)
+static void kvm_mmu_clear_all_pte_masks(void)
{
shadow_user_mask = 0;
shadow_accessed_mask = 0;
@@ -2708,7 +2709,18 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+ /*
+ * Some reserved pages, such as those from NVDIMM
+ * DAX devices, are not for MMIO, and can be mapped
+ * with cached memory type for better performance.
+ * However, the above check misconceives those pages
+ * as MMIO, and results in KVM mapping them with UC
+ * memory type, which would hurt the performance.
+ * Therefore, we check the host memory type in addition
+ * and only treat UC/UC-/WC pages as MMIO.
+ */
+ (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
return true;
}
@@ -4951,6 +4963,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
emulate:
+ /*
+ * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+ * This can happen if a guest gets a page-fault on data access but the HW
+ * table walker is not able to read the instruction page (e.g instruction
+ * page is not present in memory). In those cases we simply restart the
+ * guest.
+ */
+ if (unlikely(insn && !insn_len))
+ return 1;
+
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
switch (er) {
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index d22ddbdf5e6e..1272861e77b9 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -19,7 +19,7 @@
#include <linux/ratelimit.h>
-char const *audit_point_name[] = {
+static char const *audit_point_name[] = {
"pre page fault",
"post page fault",
"pre pte write",
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4e3c79530526..b3e488a74828 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,10 @@
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
#include <linux/frame.h>
+#include <linux/psp-sev.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -214,6 +218,9 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
+
+ /* which host CPU was used for running this vcpu */
+ unsigned int last_cpu;
};
/*
@@ -289,8 +296,12 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/* enable/disable SEV support */
+static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev, int, 0444);
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@ -324,6 +335,38 @@ enum {
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+static unsigned int max_sev_asid;
+static unsigned int min_sev_asid;
+static unsigned long *sev_asid_bitmap;
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+struct enc_region {
+ struct list_head list;
+ unsigned long npages;
+ struct page **pages;
+ unsigned long uaddr;
+ unsigned long size;
+};
+
+static inline bool svm_sev_enabled(void)
+{
+ return max_sev_asid;
+}
+
+static inline bool sev_guest(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->active;
+}
+
+static inline int sev_get_asid(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->asid;
+}
+
static inline void mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
@@ -530,10 +573,14 @@ struct svm_cpu_data {
u64 asid_generation;
u32 max_asid;
u32 next_asid;
+ u32 min_asid;
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
struct vmcb *current_vmcb;
+
+ /* index = sev_asid, value = vmcb pointer */
+ struct vmcb **sev_vmcbs;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -788,6 +835,7 @@ static int svm_hardware_enable(void)
sd->asid_generation = 1;
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
+ sd->min_asid = max_sev_asid + 1;
gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@@ -846,6 +894,7 @@ static void svm_cpu_uninit(int cpu)
return;
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+ kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
kfree(sd);
}
@@ -859,11 +908,18 @@ static int svm_cpu_init(int cpu)
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- sd->save_area = alloc_page(GFP_KERNEL);
r = -ENOMEM;
+ sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto err_1;
+ if (svm_sev_enabled()) {
+ r = -ENOMEM;
+ sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+ if (!sd->sev_vmcbs)
+ goto err_1;
+ }
+
per_cpu(svm_data, cpu) = sd;
return 0;
@@ -1070,6 +1126,48 @@ static int avic_ga_log_notifier(u32 ga_tag)
return 0;
}
+static __init int sev_hardware_setup(void)
+{
+ struct sev_user_data_status *status;
+ int rc;
+
+ /* Maximum number of encrypted guests supported simultaneously */
+ max_sev_asid = cpuid_ecx(0x8000001F);
+
+ if (!max_sev_asid)
+ return 1;
+
+ /* Minimum ASID value that should be used for SEV guest */
+ min_sev_asid = cpuid_edx(0x8000001F);
+
+ /* Initialize SEV ASID bitmap */
+ sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!sev_asid_bitmap)
+ return 1;
+
+ status = kmalloc(sizeof(*status), GFP_KERNEL);
+ if (!status)
+ return 1;
+
+ /*
+ * Check SEV platform status.
+ *
+ * PLATFORM_STATUS can be called in any state, if we failed to query
+ * the PLATFORM status then either PSP firmware does not support SEV
+ * feature or SEV firmware is dead.
+ */
+ rc = sev_platform_status(status, NULL);
+ if (rc)
+ goto err;
+
+ pr_info("SEV supported\n");
+
+err:
+ kfree(status);
+ return rc;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1105,6 +1203,17 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
+ if (sev) {
+ if (boot_cpu_has(X86_FEATURE_SEV) &&
+ IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
+ r = sev_hardware_setup();
+ if (r)
+ sev = false;
+ } else {
+ sev = false;
+ }
+ }
+
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -1166,6 +1275,9 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
+ if (svm_sev_enabled())
+ kfree(sev_asid_bitmap);
+
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -1318,7 +1430,7 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
- control->nested_ctl = 1;
+ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
@@ -1356,6 +1468,11 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
+ if (sev_guest(svm->vcpu.kvm)) {
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+ clr_exception_intercept(svm, UD_VECTOR);
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1438,6 +1555,179 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+static void __sev_asid_free(int asid)
+{
+ struct svm_cpu_data *sd;
+ int cpu, pos;
+
+ pos = asid - 1;
+ clear_bit(pos, sev_asid_bitmap);
+
+ for_each_possible_cpu(cpu) {
+ sd = per_cpu(svm_data, cpu);
+ sd->sev_vmcbs[pos] = NULL;
+ }
+}
+
+static void sev_asid_free(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ __sev_asid_free(sev->asid);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
+ struct sev_data_decommission *decommission;
+ struct sev_data_deactivate *data;
+
+ if (!handle)
+ return;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ /* deactivate handle */
+ data->handle = handle;
+ sev_guest_deactivate(data, NULL);
+
+ wbinvd_on_all_cpus();
+ sev_guest_df_flush(NULL);
+ kfree(data);
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ /* decommission handle */
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+ unsigned long ulen, unsigned long *n,
+ int write)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ unsigned long npages, npinned, size;
+ unsigned long locked, lock_limit;
+ struct page **pages;
+ int first, last;
+
+ /* Calculate number of pages. */
+ first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
+ last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
+ npages = (last - first + 1);
+
+ locked = sev->pages_locked + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+ return NULL;
+ }
+
+ /* Avoid using vmalloc for smaller buffers. */
+ size = npages * sizeof(struct page *);
+ if (size > PAGE_SIZE)
+ pages = vmalloc(size);
+ else
+ pages = kmalloc(size, GFP_KERNEL);
+
+ if (!pages)
+ return NULL;
+
+ /* Pin the user virtual address. */
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+ if (npinned != npages) {
+ pr_err("SEV: Failure locking %lu pages.\n", npages);
+ goto err;
+ }
+
+ *n = npages;
+ sev->pages_locked = locked;
+
+ return pages;
+
+err:
+ if (npinned > 0)
+ release_pages(pages, npinned);
+
+ kvfree(pages);
+ return NULL;
+}
+
+static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
+ unsigned long npages)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ release_pages(pages, npages);
+ kvfree(pages);
+ sev->pages_locked -= npages;
+}
+
+static void sev_clflush_pages(struct page *pages[], unsigned long npages)
+{
+ uint8_t *page_virtual;
+ unsigned long i;
+
+ if (npages == 0 || pages == NULL)
+ return;
+
+ for (i = 0; i < npages; i++) {
+ page_virtual = kmap_atomic(pages[i]);
+ clflush_cache_range(page_virtual, PAGE_SIZE);
+ kunmap_atomic(page_virtual);
+ }
+}
+
+static void __unregister_enc_region_locked(struct kvm *kvm,
+ struct enc_region *region)
+{
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ sev_unpin_memory(kvm, region->pages, region->npages);
+ list_del(&region->list);
+ kfree(region);
+}
+
+static void sev_vm_destroy(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct list_head *pos, *q;
+
+ if (!sev_guest(kvm))
+ return;
+
+ mutex_lock(&kvm->lock);
+
+ /*
+ * if userspace was terminated before unregistering the memory regions
+ * then lets unpin all the registered memory.
+ */
+ if (!list_empty(head)) {
+ list_for_each_safe(pos, q, head) {
+ __unregister_enc_region_locked(kvm,
+ list_entry(pos, struct enc_region, list));
+ }
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ sev_unbind_asid(kvm, sev->handle);
+ sev_asid_free(kvm);
+}
+
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -1456,6 +1746,12 @@ static void avic_vm_destroy(struct kvm *kvm)
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
+static void svm_vm_destroy(struct kvm *kvm)
+{
+ avic_vm_destroy(kvm);
+ sev_vm_destroy(kvm);
+}
+
static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
@@ -2066,7 +2362,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -2125,7 +2421,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
{
if (sd->next_asid > sd->max_asid) {
++sd->asid_generation;
- sd->next_asid = 1;
+ sd->next_asid = sd->min_asid;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
@@ -2173,22 +2469,24 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
static int npf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
@@ -2415,7 +2713,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -2957,7 +3255,8 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
if (vmcb->control.asid == 0)
return false;
- if (vmcb->control.nested_ctl && !npt_enabled)
+ if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+ !npt_enabled)
return false;
return true;
@@ -2971,7 +3270,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
- if (nested_vmcb->control.nested_ctl) {
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
kvm_mmu_unload(&svm->vcpu);
svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
nested_svm_init_mmu_context(&svm->vcpu);
@@ -3019,7 +3318,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
svm->nested.intercept = nested_vmcb->control.intercept;
- svm_flush_tlb(&svm->vcpu);
+ svm_flush_tlb(&svm->vcpu, true);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -4442,12 +4741,39 @@ static void reload_tss(struct kvm_vcpu *vcpu)
load_TR_desc();
}
+static void pre_sev_run(struct vcpu_svm *svm, int cpu)
+{
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ int asid = sev_get_asid(svm->vcpu.kvm);
+
+ /* Assign the asid allocated with this SEV guest */
+ svm->vmcb->control.asid = asid;
+
+ /*
+ * Flush guest TLB:
+ *
+ * 1) when different VMCB for the same ASID is to be run on the same host CPU.
+ * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
+ */
+ if (sd->sev_vmcbs[asid] == svm->vmcb &&
+ svm->last_cpu == cpu)
+ return;
+
+ svm->last_cpu = cpu;
+ sd->sev_vmcbs[asid] = svm->vmcb;
+ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+ mark_dirty(svm->vmcb, VMCB_ASID);
+}
+
static void pre_svm_run(struct vcpu_svm *svm)
{
int cpu = raw_smp_processor_id();
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ if (sev_guest(svm->vcpu.kvm))
+ return pre_sev_run(svm, cpu);
+
/* FIXME: handle wraparound of asid_generation */
if (svm->asid_generation != sd->asid_generation)
new_asid(svm, sd);
@@ -4865,7 +5191,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5208,7 +5534,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5222,7 +5548,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static int is_disabled(void)
@@ -5308,6 +5634,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
entry->edx |= SVM_FEATURE_NPT;
break;
+ case 0x8000001F:
+ /* Support memory encryption cpuid if host supports it */
+ if (boot_cpu_has(X86_FEATURE_SEV))
+ cpuid(0x8000001f, &entry->eax, &entry->ebx,
+ &entry->ecx, &entry->edx);
+
}
}
@@ -5336,6 +5668,11 @@ static bool svm_xsaves_supported(void)
return false;
}
+static bool svm_umip_emulated(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -5637,6 +5974,828 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static int sev_asid_new(void)
+{
+ int pos;
+
+ /*
+ * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+ */
+ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
+ if (pos >= max_sev_asid)
+ return -EBUSY;
+
+ set_bit(pos, sev_asid_bitmap);
+ return pos + 1;
+}
+
+static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ int asid, ret;
+
+ ret = -EBUSY;
+ asid = sev_asid_new();
+ if (asid < 0)
+ return ret;
+
+ ret = sev_platform_init(&argp->error);
+ if (ret)
+ goto e_free;
+
+ sev->active = true;
+ sev->asid = asid;
+ INIT_LIST_HEAD(&sev->regions_list);
+
+ return 0;
+
+e_free:
+ __sev_asid_free(asid);
+ return ret;
+}
+
+static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+{
+ struct sev_data_activate *data;
+ int asid = sev_get_asid(kvm);
+ int ret;
+
+ wbinvd_on_all_cpus();
+
+ ret = sev_guest_df_flush(error);
+ if (ret)
+ return ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* activate ASID on the given handle */
+ data->handle = handle;
+ data->asid = asid;
+ ret = sev_guest_activate(data, error);
+ kfree(data);
+
+ return ret;
+}
+
+static int __sev_issue_cmd(int fd, int id, void *data, int *error)
+{
+ struct fd f;
+ int ret;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = sev_issue_cmd_external_user(f.file, id, data, error);
+
+ fdput(f);
+ return ret;
+}
+
+static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return __sev_issue_cmd(sev->fd, id, data, error);
+}
+
+static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_start *start;
+ struct kvm_sev_launch_start params;
+ void *dh_blob, *session_blob;
+ int *error = &argp->error;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ start = kzalloc(sizeof(*start), GFP_KERNEL);
+ if (!start)
+ return -ENOMEM;
+
+ dh_blob = NULL;
+ if (params.dh_uaddr) {
+ dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
+ if (IS_ERR(dh_blob)) {
+ ret = PTR_ERR(dh_blob);
+ goto e_free;
+ }
+
+ start->dh_cert_address = __sme_set(__pa(dh_blob));
+ start->dh_cert_len = params.dh_len;
+ }
+
+ session_blob = NULL;
+ if (params.session_uaddr) {
+ session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
+ if (IS_ERR(session_blob)) {
+ ret = PTR_ERR(session_blob);
+ goto e_free_dh;
+ }
+
+ start->session_address = __sme_set(__pa(session_blob));
+ start->session_len = params.session_len;
+ }
+
+ start->handle = params.handle;
+ start->policy = params.policy;
+
+ /* create memory encryption context */
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+ if (ret)
+ goto e_free_session;
+
+ /* Bind ASID to this guest */
+ ret = sev_bind_asid(kvm, start->handle, error);
+ if (ret)
+ goto e_free_session;
+
+ /* return handle to userspace */
+ params.handle = start->handle;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+ sev_unbind_asid(kvm, start->handle);
+ ret = -EFAULT;
+ goto e_free_session;
+ }
+
+ sev->handle = start->handle;
+ sev->fd = argp->sev_fd;
+
+e_free_session:
+ kfree(session_blob);
+e_free_dh:
+ kfree(dh_blob);
+e_free:
+ kfree(start);
+ return ret;
+}
+
+static int get_num_contig_pages(int idx, struct page **inpages,
+ unsigned long npages)
+{
+ unsigned long paddr, next_paddr;
+ int i = idx + 1, pages = 1;
+
+ /* find the number of contiguous pages starting from idx */
+ paddr = __sme_page_pa(inpages[idx]);
+ while (i < npages) {
+ next_paddr = __sme_page_pa(inpages[i++]);
+ if ((paddr + PAGE_SIZE) == next_paddr) {
+ pages++;
+ paddr = next_paddr;
+ continue;
+ }
+ break;
+ }
+
+ return pages;
+}
+
+static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_launch_update_data params;
+ struct sev_data_launch_update_data *data;
+ struct page **inpages;
+ int i, ret, pages;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ vaddr = params.uaddr;
+ size = params.len;
+ vaddr_end = vaddr + size;
+
+ /* Lock the user memory. */
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ if (!inpages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The LAUNCH_UPDATE command will perform in-place encryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(inpages, npages);
+
+ for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
+ int offset, len;
+
+ /*
+ * If the user buffer is not page-aligned, calculate the offset
+ * within the page.
+ */
+ offset = vaddr & (PAGE_SIZE - 1);
+
+ /* Calculate the number of pages that can be encrypted in one go. */
+ pages = get_num_contig_pages(i, inpages, npages);
+
+ len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
+
+ data->handle = sev->handle;
+ data->len = len;
+ data->address = __sme_page_pa(inpages[i]) + offset;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+ if (ret)
+ goto e_unpin;
+
+ size -= len;
+ next_vaddr = vaddr + len;
+ }
+
+e_unpin:
+ /* content of memory is updated, mark pages dirty */
+ for (i = 0; i < npages; i++) {
+ set_page_dirty_lock(inpages[i]);
+ mark_page_accessed(inpages[i]);
+ }
+ /* unlock the user pages */
+ sev_unpin_memory(kvm, inpages, npages);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_measure *data;
+ struct kvm_sev_launch_measure params;
+ void *blob = NULL;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* User wants to query the blob length */
+ if (!params.len)
+ goto cmd;
+
+ if (params.uaddr) {
+ if (params.len > SEV_FW_BLOB_MAX_SIZE) {
+ ret = -EINVAL;
+ goto e_free;
+ }
+
+ if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ ret = -ENOMEM;
+ blob = kmalloc(params.len, GFP_KERNEL);
+ if (!blob)
+ goto e_free;
+
+ data->address = __psp_pa(blob);
+ data->len = params.len;
+ }
+
+cmd:
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+
+ /*
+ * If we query the session length, FW responded with expected data.
+ */
+ if (!params.len)
+ goto done;
+
+ if (ret)
+ goto e_free_blob;
+
+ if (blob) {
+ if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ ret = -EFAULT;
+ }
+
+done:
+ params.len = data->len;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_finish *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
+
+ kfree(data);
+ return ret;
+}
+
+static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_guest_status params;
+ struct sev_data_guest_status *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+ if (ret)
+ goto e_free;
+
+ params.policy = data->policy;
+ params.state = data->state;
+ params.handle = data->handle;
+
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
+ unsigned long dst, int size,
+ int *error, bool enc)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_dbg *data;
+ int ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ data->dst_addr = dst;
+ data->src_addr = src;
+ data->len = size;
+
+ ret = sev_issue_cmd(kvm,
+ enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+ data, error);
+ kfree(data);
+ return ret;
+}
+
+static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
+ unsigned long dst_paddr, int sz, int *err)
+{
+ int offset;
+
+ /*
+ * Its safe to read more than we are asked, caller should ensure that
+ * destination has enough space.
+ */
+ src_paddr = round_down(src_paddr, 16);
+ offset = src_paddr & 15;
+ sz = round_up(sz + offset, 16);
+
+ return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
+}
+
+static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user dst_uaddr,
+ unsigned long dst_paddr,
+ int size, int *err)
+{
+ struct page *tpage = NULL;
+ int ret, offset;
+
+ /* if inputs are not 16-byte then use intermediate buffer */
+ if (!IS_ALIGNED(dst_paddr, 16) ||
+ !IS_ALIGNED(paddr, 16) ||
+ !IS_ALIGNED(size, 16)) {
+ tpage = (void *)alloc_page(GFP_KERNEL);
+ if (!tpage)
+ return -ENOMEM;
+
+ dst_paddr = __sme_page_pa(tpage);
+ }
+
+ ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
+ if (ret)
+ goto e_free;
+
+ if (tpage) {
+ offset = paddr & 15;
+ if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
+ page_address(tpage) + offset, size))
+ ret = -EFAULT;
+ }
+
+e_free:
+ if (tpage)
+ __free_page(tpage);
+
+ return ret;
+}
+
+static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user vaddr,
+ unsigned long dst_paddr,
+ unsigned long __user dst_vaddr,
+ int size, int *error)
+{
+ struct page *src_tpage = NULL;
+ struct page *dst_tpage = NULL;
+ int ret, len = size;
+
+ /* If source buffer is not aligned then use an intermediate buffer */
+ if (!IS_ALIGNED(vaddr, 16)) {
+ src_tpage = alloc_page(GFP_KERNEL);
+ if (!src_tpage)
+ return -ENOMEM;
+
+ if (copy_from_user(page_address(src_tpage),
+ (void __user *)(uintptr_t)vaddr, size)) {
+ __free_page(src_tpage);
+ return -EFAULT;
+ }
+
+ paddr = __sme_page_pa(src_tpage);
+ }
+
+ /*
+ * If destination buffer or length is not aligned then do read-modify-write:
+ * - decrypt destination in an intermediate buffer
+ * - copy the source buffer in an intermediate buffer
+ * - use the intermediate buffer as source buffer
+ */
+ if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ int dst_offset;
+
+ dst_tpage = alloc_page(GFP_KERNEL);
+ if (!dst_tpage) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ ret = __sev_dbg_decrypt(kvm, dst_paddr,
+ __sme_page_pa(dst_tpage), size, error);
+ if (ret)
+ goto e_free;
+
+ /*
+ * If source is kernel buffer then use memcpy() otherwise
+ * copy_from_user().
+ */
+ dst_offset = dst_paddr & 15;
+
+ if (src_tpage)
+ memcpy(page_address(dst_tpage) + dst_offset,
+ page_address(src_tpage), size);
+ else {
+ if (copy_from_user(page_address(dst_tpage) + dst_offset,
+ (void __user *)(uintptr_t)vaddr, size)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+ }
+
+ paddr = __sme_page_pa(dst_tpage);
+ dst_paddr = round_down(dst_paddr, 16);
+ len = round_up(size, 16);
+ }
+
+ ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
+
+e_free:
+ if (src_tpage)
+ __free_page(src_tpage);
+ if (dst_tpage)
+ __free_page(dst_tpage);
+ return ret;
+}
+
+static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr;
+ unsigned long dst_vaddr, dst_vaddr_end;
+ struct page **src_p, **dst_p;
+ struct kvm_sev_dbg debug;
+ unsigned long n;
+ int ret, size;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+ return -EFAULT;
+
+ vaddr = debug.src_uaddr;
+ size = debug.len;
+ vaddr_end = vaddr + size;
+ dst_vaddr = debug.dst_uaddr;
+ dst_vaddr_end = dst_vaddr + size;
+
+ for (; vaddr < vaddr_end; vaddr = next_vaddr) {
+ int len, s_off, d_off;
+
+ /* lock userspace source and destination page */
+ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+ if (!src_p)
+ return -EFAULT;
+
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ if (!dst_p) {
+ sev_unpin_memory(kvm, src_p, n);
+ return -EFAULT;
+ }
+
+ /*
+ * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(src_p, 1);
+ sev_clflush_pages(dst_p, 1);
+
+ /*
+ * Since user buffer may not be page aligned, calculate the
+ * offset within the page.
+ */
+ s_off = vaddr & ~PAGE_MASK;
+ d_off = dst_vaddr & ~PAGE_MASK;
+ len = min_t(size_t, (PAGE_SIZE - s_off), size);
+
+ if (dec)
+ ret = __sev_dbg_decrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ dst_vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ len, &argp->error);
+ else
+ ret = __sev_dbg_encrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ dst_vaddr,
+ len, &argp->error);
+
+ sev_unpin_memory(kvm, src_p, 1);
+ sev_unpin_memory(kvm, dst_p, 1);
+
+ if (ret)
+ goto err;
+
+ next_vaddr = vaddr + len;
+ dst_vaddr = dst_vaddr + len;
+ size -= len;
+ }
+err:
+ return ret;
+}
+
+static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_secret *data;
+ struct kvm_sev_launch_secret params;
+ struct page **pages;
+ void *blob, *hdr;
+ unsigned long n;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ if (!pages)
+ return -ENOMEM;
+
+ /*
+ * The secret must be copied into contiguous memory region, lets verify
+ * that userspace memory pages are contiguous before we issue command.
+ */
+ if (get_num_contig_pages(0, pages, n) != n) {
+ ret = -EINVAL;
+ goto e_unpin_memory;
+ }
+
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto e_unpin_memory;
+
+ blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+ if (IS_ERR(blob)) {
+ ret = PTR_ERR(blob);
+ goto e_free;
+ }
+
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+ if (IS_ERR(hdr)) {
+ ret = PTR_ERR(hdr);
+ goto e_free_blob;
+ }
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+
+ kfree(hdr);
+
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+e_unpin_memory:
+ sev_unpin_memory(kvm, pages, n);
+ return ret;
+}
+
+static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_sev_cmd sev_cmd;
+ int r;
+
+ if (!svm_sev_enabled())
+ return -ENOTTY;
+
+ if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
+ return -EFAULT;
+
+ mutex_lock(&kvm->lock);
+
+ switch (sev_cmd.id) {
+ case KVM_SEV_INIT:
+ r = sev_guest_init(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_START:
+ r = sev_launch_start(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_UPDATE_DATA:
+ r = sev_launch_update_data(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_MEASURE:
+ r = sev_launch_measure(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_FINISH:
+ r = sev_launch_finish(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_GUEST_STATUS:
+ r = sev_guest_status(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_DBG_DECRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, true);
+ break;
+ case KVM_SEV_DBG_ENCRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, false);
+ break;
+ case KVM_SEV_LAUNCH_SECRET:
+ r = sev_launch_secret(kvm, &sev_cmd);
+ break;
+ default:
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
+ r = -EFAULT;
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static int svm_register_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct enc_region *region;
+ int ret = 0;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+ if (!region->pages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ mutex_lock(&kvm->lock);
+ list_add_tail(&region->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
+ return ret;
+
+e_free:
+ kfree(region);
+ return ret;
+}
+
+static struct enc_region *
+find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct enc_region *i;
+
+ list_for_each_entry(i, head, list) {
+ if (i->uaddr == range->addr &&
+ i->size == range->size)
+ return i;
+ }
+
+ return NULL;
+}
+
+
+static int svm_unregister_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct enc_region *region;
+ int ret;
+
+ mutex_lock(&kvm->lock);
+
+ if (!sev_guest(kvm)) {
+ ret = -ENOTTY;
+ goto failed;
+ }
+
+ region = find_enc_region(kvm, range);
+ if (!region) {
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ __unregister_enc_region_locked(kvm, region);
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+
+failed:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5653,7 +6812,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_reset = svm_vcpu_reset,
.vm_init = avic_vm_init,
- .vm_destroy = avic_vm_destroy,
+ .vm_destroy = svm_vm_destroy,
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
@@ -5713,6 +6872,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
+ .sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
@@ -5729,6 +6889,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.invpcid_supported = svm_invpcid_supported,
.mpx_supported = svm_mpx_supported,
.xsaves_supported = svm_xsaves_supported,
+ .umip_emulated = svm_umip_emulated,
.set_supported_cpuid = svm_set_supported_cpuid,
@@ -5752,6 +6913,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pre_enter_smm = svm_pre_enter_smm,
.pre_leave_smm = svm_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .mem_enc_op = svm_mem_enc_op,
+ .mem_enc_reg_region = svm_register_enc_region,
+ .mem_enc_unreg_region = svm_unregister_enc_region,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bee4c49f6dd0..f427723dc7db 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -419,6 +419,12 @@ struct __packed vmcs12 {
#define VMCS12_SIZE 0x1000
/*
+ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
+ * supported VMCS12 field encoding.
+ */
+#define VMCS12_MAX_FIELD_INDEX 0x17
+
+/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
*/
@@ -441,6 +447,7 @@ struct nested_vmx {
* data hold by vmcs12
*/
bool sync_shadow_vmcs;
+ bool dirty_vmcs12;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
@@ -664,6 +671,8 @@ struct vcpu_vmx {
u32 host_pkru;
+ unsigned long host_debugctlmsr;
+
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -692,67 +701,24 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return &(to_vmx(vcpu)->pi_desc);
}
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
-#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
-#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
- [number##_HIGH] = VMCS12_OFFSET(name)+4
+#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
+#define FIELD64(number, name) \
+ FIELD(number, name), \
+ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-static unsigned long shadow_read_only_fields[] = {
- /*
- * We do NOT shadow fields that are modified when L0
- * traps and emulates any vmx instruction (e.g. VMPTRLD,
- * VMXON...) executed by L1.
- * For example, VM_INSTRUCTION_ERROR is read
- * by L1 if a vmx instruction fails (part of the error path).
- * Note the code assumes this logic. If for some reason
- * we start shadowing these fields then we need to
- * force a shadow sync when L0 emulates vmx instructions
- * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
- * by nested_vmx_failValid)
- */
- VM_EXIT_REASON,
- VM_EXIT_INTR_INFO,
- VM_EXIT_INSTRUCTION_LEN,
- IDT_VECTORING_INFO_FIELD,
- IDT_VECTORING_ERROR_CODE,
- VM_EXIT_INTR_ERROR_CODE,
- EXIT_QUALIFICATION,
- GUEST_LINEAR_ADDRESS,
- GUEST_PHYSICAL_ADDRESS
+static u16 shadow_read_only_fields[] = {
+#define SHADOW_FIELD_RO(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static unsigned long shadow_read_write_fields[] = {
- TPR_THRESHOLD,
- GUEST_RIP,
- GUEST_RSP,
- GUEST_CR0,
- GUEST_CR3,
- GUEST_CR4,
- GUEST_INTERRUPTIBILITY_INFO,
- GUEST_RFLAGS,
- GUEST_CS_SELECTOR,
- GUEST_CS_AR_BYTES,
- GUEST_CS_LIMIT,
- GUEST_CS_BASE,
- GUEST_ES_BASE,
- GUEST_BNDCFGS,
- CR0_GUEST_HOST_MASK,
- CR0_READ_SHADOW,
- CR4_READ_SHADOW,
- TSC_OFFSET,
- EXCEPTION_BITMAP,
- CPU_BASED_VM_EXEC_CONTROL,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- VM_ENTRY_INTR_INFO_FIELD,
- VM_ENTRY_INSTRUCTION_LEN,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- HOST_FS_BASE,
- HOST_GS_BASE,
- HOST_FS_SELECTOR,
- HOST_GS_SELECTOR
+static u16 shadow_read_write_fields[] = {
+#define SHADOW_FIELD_RW(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
@@ -905,13 +871,17 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
unsigned short offset;
+ unsigned index;
+
+ if (field >> 15)
+ return -ENOENT;
- BUILD_BUG_ON(size > SHRT_MAX);
- if (field >= size)
+ index = ROL16(field, 6);
+ if (index >= size)
return -ENOENT;
- field = array_index_nospec(field, size);
- offset = vmcs_field_to_offset_table[field];
+ index = array_index_nospec(index, size);
+ offset = vmcs_field_to_offset_table[index];
if (offset == 0)
return -ENOENT;
return offset;
@@ -957,8 +927,6 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
enum {
- VMX_IO_BITMAP_A,
- VMX_IO_BITMAP_B,
VMX_VMREAD_BITMAP,
VMX_VMWRITE_BITMAP,
VMX_BITMAP_NR
@@ -966,8 +934,6 @@ enum {
static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
-#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
-#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
@@ -2373,6 +2339,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
+ vmx->host_debugctlmsr = get_debugctlmsr();
}
static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -2930,7 +2897,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
- vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+ vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
}
/*
@@ -3266,6 +3233,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
switch (msr_info->index) {
@@ -3277,8 +3245,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ vmx_load_host_state(vmx);
+ msr_info->data = vmx->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
@@ -3318,13 +3286,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
+ !(vmx->msr_ia32_feature_control &
FEATURE_CONTROL_LMCE))
return 1;
msr_info->data = vcpu->arch.mcg_ext_ctl;
break;
case MSR_IA32_FEATURE_CONTROL:
- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+ msr_info->data = vmx->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -3341,7 +3309,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+ msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -3727,7 +3695,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#endif
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_INVLPG_EXITING |
@@ -3757,6 +3725,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -3982,17 +3951,17 @@ static void free_kvm_area(void)
}
}
-enum vmcs_field_type {
- VMCS_FIELD_TYPE_U16 = 0,
- VMCS_FIELD_TYPE_U64 = 1,
- VMCS_FIELD_TYPE_U32 = 2,
- VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+enum vmcs_field_width {
+ VMCS_FIELD_WIDTH_U16 = 0,
+ VMCS_FIELD_WIDTH_U64 = 1,
+ VMCS_FIELD_WIDTH_U32 = 2,
+ VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
};
-static inline int vmcs_field_type(unsigned long field)
+static inline int vmcs_field_width(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
- return VMCS_FIELD_TYPE_U32;
+ return VMCS_FIELD_WIDTH_U32;
return (field >> 13) & 0x3 ;
}
@@ -4005,43 +3974,66 @@ static void init_vmcs_shadow_fields(void)
{
int i, j;
- /* No checks for read only fields yet */
+ for (i = j = 0; i < max_shadow_read_only_fields; i++) {
+ u16 field = shadow_read_only_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_only_fields ||
+ shadow_read_only_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_only_field %x\n",
+ field + 1);
+
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
+ if (j < i)
+ shadow_read_only_fields[j] = field;
+ j++;
+ }
+ max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
- switch (shadow_read_write_fields[i]) {
- case GUEST_BNDCFGS:
- if (!kvm_mpx_supported())
+ u16 field = shadow_read_write_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_write_fields ||
+ shadow_read_write_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_write_field %x\n",
+ field + 1);
+
+ /*
+ * PML and the preemption timer can be emulated, but the
+ * processor cannot vmwrite to fields that don't exist
+ * on bare metal.
+ */
+ switch (field) {
+ case GUEST_PML_INDEX:
+ if (!cpu_has_vmx_pml())
+ continue;
+ break;
+ case VMX_PREEMPTION_TIMER_VALUE:
+ if (!cpu_has_vmx_preemption_timer())
+ continue;
+ break;
+ case GUEST_INTR_STATUS:
+ if (!cpu_has_vmx_apicv())
continue;
break;
default:
break;
}
+ clear_bit(field, vmx_vmwrite_bitmap);
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
if (j < i)
- shadow_read_write_fields[j] =
- shadow_read_write_fields[i];
+ shadow_read_write_fields[j] = field;
j++;
}
max_shadow_read_write_fields = j;
-
- /* shadowed fields guest access without vmexit */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- unsigned long field = shadow_read_write_fields[i];
-
- clear_bit(field, vmx_vmwrite_bitmap);
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
- clear_bit(field + 1, vmx_vmwrite_bitmap);
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
- }
- for (i = 0; i < max_shadow_read_only_fields; i++) {
- unsigned long field = shadow_read_only_fields[i];
-
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
}
static __init int alloc_kvm_area(void)
@@ -4254,9 +4246,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
+ bool invalidate_gpa)
{
- if (enable_ept) {
+ if (enable_ept && (invalidate_gpa || !enable_vpid)) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
@@ -4265,15 +4258,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
}
}
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}
static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
{
if (enable_ept)
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -4471,7 +4464,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
ept_load_pdptrs(vcpu);
}
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
vmcs_writel(GUEST_CR3, guest_cr3);
}
@@ -4488,6 +4481,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(to_vmx(vcpu)->rmode.vm86_active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+ hw_cr4 &= ~X86_CR4_UMIP;
+ } else
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+
if (cr4 & X86_CR4_VMXE) {
/*
* To use VMXON (and later other VMX instructions), a guest
@@ -5119,11 +5120,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
{
int f = sizeof(unsigned long);
- if (!cpu_has_vmx_msr_bitmap()) {
- WARN_ON(1);
- return;
- }
-
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
@@ -5263,7 +5259,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
vapic_page = kmap(vmx->nested.virtual_apic_page);
- __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
+ vapic_page, &max_irr);
kunmap(vmx->nested.virtual_apic_page);
status = vmcs_read16(GUEST_INTR_STATUS);
@@ -5323,14 +5320,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu, true);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /* the PIR and ON have been set by L1. */
+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
+ kvm_vcpu_kick(vcpu);
return 0;
}
return -1;
@@ -5509,6 +5507,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
struct kvm_vcpu *vcpu = &vmx->vcpu;
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
+
if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0)
@@ -5527,6 +5526,11 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+
+ /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
+ * in vmx_set_cr4. */
+ exec_control &= ~SECONDARY_EXEC_DESC;
+
/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
(handle_vmptrld).
We can NOT enable shadow_vmcs here because we don't have yet
@@ -5646,10 +5650,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
#endif
int i;
- /* I/O */
- vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
- vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
-
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
@@ -6304,6 +6304,12 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
return kvm_set_cr4(vcpu, val);
}
+static int handle_desc(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
+ return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification, val;
@@ -6760,7 +6766,21 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu) &&
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
- return kvm_skip_emulated_instruction(vcpu);
+ /*
+ * Doing kvm_skip_emulated_instruction() depends on undefined
+ * behavior: Intel's manual doesn't mandate
+ * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
+ * occurs and while on real hardware it was observed to be set,
+ * other hypervisors (namely Hyper-V) don't set it, we end up
+ * advancing IP with some random value. Disable fast mmio when
+ * running nested and keep it for real hardware in hope that
+ * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
+ */
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return kvm_skip_emulated_instruction(vcpu);
+ else
+ return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
+ NULL, 0) == EMULATE_DONE;
}
ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -6957,10 +6977,6 @@ static __init int hardware_setup(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-
- memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
goto out;
@@ -6973,11 +6989,6 @@ static __init int hardware_setup(void)
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
- if (!cpu_has_vmx_shadow_vmcs())
- enable_shadow_vmcs = 0;
- if (enable_shadow_vmcs)
- init_vmcs_shadow_fields();
-
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels() ||
!cpu_has_vmx_ept_mt_wb() ||
@@ -7063,6 +7074,11 @@ static __init int hardware_setup(void)
kvm_x86_ops->cancel_hv_timer = NULL;
}
+ if (!cpu_has_vmx_shadow_vmcs())
+ enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -7593,17 +7609,17 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
p = ((char *)(get_vmcs12(vcpu))) + offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*ret = *((natural_width *)p);
return 0;
- case VMCS_FIELD_TYPE_U16:
+ case VMCS_FIELD_WIDTH_U16:
*ret = *((u16 *)p);
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*ret = *((u32 *)p);
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*ret = *((u64 *)p);
return 0;
default:
@@ -7620,17 +7636,17 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
if (offset < 0)
return offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
return 0;
default:
@@ -7646,7 +7662,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
- const unsigned long *fields = shadow_read_write_fields;
+ const u16 *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields;
preempt_disable();
@@ -7655,23 +7671,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
for (i = 0; i < num_fields; i++) {
field = fields[i];
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- field_value = vmcs_read16(field);
- break;
- case VMCS_FIELD_TYPE_U32:
- field_value = vmcs_read32(field);
- break;
- case VMCS_FIELD_TYPE_U64:
- field_value = vmcs_read64(field);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- field_value = vmcs_readl(field);
- break;
- default:
- WARN_ON(1);
- continue;
- }
+ field_value = __vmcs_readl(field);
vmcs12_write_any(&vmx->vcpu, field, field_value);
}
@@ -7683,7 +7683,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const unsigned long *fields[] = {
+ const u16 *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -7702,24 +7702,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(&vmx->vcpu, field, &field_value);
-
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- vmcs_write16(field, (u16)field_value);
- break;
- case VMCS_FIELD_TYPE_U32:
- vmcs_write32(field, (u32)field_value);
- break;
- case VMCS_FIELD_TYPE_U64:
- vmcs_write64(field, (u64)field_value);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- vmcs_writel(field, (long)field_value);
- break;
- default:
- WARN_ON(1);
- break;
- }
+ __vmcs_writel(field, field_value);
}
}
@@ -7788,8 +7771,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
unsigned long field;
gva_t gva;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+
/* The value to write might be 32 or 64 bits, depending on L1's long
* mode, and eventually we need to write that into a field of several
* possible lengths. The code below first zero-extends the value to 64
@@ -7832,6 +7817,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
+ switch (field) {
+#define SHADOW_FIELD_RW(x) case x:
+#include "vmx_shadow_fields.h"
+ /*
+ * The fields that can be updated by L1 without a vmexit are
+ * always updated in the vmcs02, the others go down the slow
+ * path of prepare_vmcs02.
+ */
+ break;
+ default:
+ vmx->nested.dirty_vmcs12 = true;
+ break;
+ }
+
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -7846,6 +7845,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
__pa(vmx->vmcs01.shadow_vmcs));
vmx->nested.sync_shadow_vmcs = true;
}
+ vmx->nested.dirty_vmcs12 = true;
}
/* Emulate the VMPTRLD instruction */
@@ -8066,7 +8066,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -8260,6 +8260,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
+ [EXIT_REASON_GDTR_IDTR] = handle_desc,
+ [EXIT_REASON_LDTR_TR] = handle_desc,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
@@ -9069,36 +9071,23 @@ static void vmx_set_rvi(int vector)
static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
- if (!is_guest_mode(vcpu)) {
- vmx_set_rvi(max_irr);
- return;
- }
-
- if (max_irr == -1)
- return;
-
- /*
- * In guest mode. If a vmexit is needed, vmx_check_nested_events
- * handles it.
- */
- if (nested_exit_on_intr(vcpu))
- return;
-
/*
- * Else, fall back to pre-APICv interrupt injection since L2
- * is run without virtual interrupt delivery.
+ * When running L2, updating RVI is only relevant when
+ * vmcs12 virtual-interrupt-delivery enabled.
+ * However, it can be enabled only when L1 also
+ * intercepts external-interrupts and in that case
+ * we should not update vmcs02 RVI but instead intercept
+ * interrupt. Therefore, do nothing when running L2.
*/
- if (!kvm_event_needs_reinjection(vcpu) &&
- vmx_interrupt_allowed(vcpu)) {
- kvm_queue_interrupt(vcpu, max_irr, false);
- vmx_inject_irq(vcpu);
- }
+ if (!is_guest_mode(vcpu))
+ vmx_set_rvi(max_irr);
}
static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
+ bool max_irr_updated;
WARN_ON(!vcpu->arch.apicv_active);
if (pi_test_on(&vmx->pi_desc)) {
@@ -9108,7 +9097,23 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
- max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+ max_irr_updated =
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+
+ /*
+ * If we are running L2 and L1 has a new pending interrupt
+ * which can be injected, we should re-evaluate
+ * what should be done with this new L1 interrupt.
+ * If L1 intercepts external-interrupts, we should
+ * exit from L2 to L1. Otherwise, interrupt should be
+ * delivered directly to L2.
+ */
+ if (is_guest_mode(vcpu) && max_irr_updated) {
+ if (nested_exit_on_intr(vcpu))
+ kvm_vcpu_exiting_guest_mode(vcpu);
+ else
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
}
@@ -9223,6 +9228,12 @@ static bool vmx_xsaves_supported(void)
SECONDARY_EXEC_XSAVES;
}
+static bool vmx_umip_emulated(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_DESC;
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -9378,7 +9389,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr, cr3, cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -9431,7 +9442,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vcpu->arch.pkru);
atomic_switch_perf_msrs(vmx);
- debugctlmsr = get_debugctlmsr();
vmx_arm_hv_timer(vcpu);
@@ -9587,8 +9597,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmexit_fill_RSB();
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
- if (debugctlmsr)
- update_debugctlmsr(debugctlmsr);
+ if (vmx->host_debugctlmsr)
+ update_debugctlmsr(vmx->host_debugctlmsr);
#ifndef CONFIG_X86_64
/*
@@ -9668,10 +9678,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
@@ -9871,7 +9879,8 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
u32 mask =
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_DESC;
u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -10037,8 +10046,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
}
}
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12);
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12);
static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
@@ -10127,11 +10136,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
(unsigned long)(vmcs12->posted_intr_desc_addr &
(PAGE_SIZE - 1)));
}
- if (cpu_has_vmx_msr_bitmap() &&
- nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
- ;
- else
+ if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_USE_MSR_BITMAPS);
}
@@ -10199,8 +10204,8 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
*/
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
{
int msr;
struct page *page;
@@ -10222,6 +10227,11 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ /* Nothing to do if the MSR bitmap is not in use. */
+ if (!cpu_has_vmx_msr_bitmap() ||
+ !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+ return false;
+
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
!pred_cmd && !spec_ctrl)
return false;
@@ -10229,32 +10239,41 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
if (is_error_page(page))
return false;
- msr_bitmap_l1 = (unsigned long *)kmap(page);
- memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
+ * just lets the processor take the value from the virtual-APIC page;
+ * take those 256 bits directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ } else {
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = ~0;
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ }
- if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
- if (nested_cpu_has_apic_reg_virt(vmcs12))
- for (msr = 0x800; msr <= 0x8ff; msr++)
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- msr, MSR_TYPE_R);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_W);
+ if (nested_cpu_has_vid(vmcs12)) {
nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_R | MSR_TYPE_W);
-
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
- }
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
}
if (spec_ctrl)
@@ -10534,25 +10553,12 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 0;
}
-/*
- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
- * guest in a way that will both be appropriate to L1's requests, and our
- * needs. In addition to modifying the active vmcs (which is vmcs02), this
- * function also has additional necessary side-effects, like setting various
- * vcpu->arch fields.
- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
- * is assigned to entry_failure_code on failure.
- */
-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- bool from_vmentry, u32 *entry_failure_code)
+static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 exec_control, vmcs12_exec_ctrl;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
@@ -10560,7 +10566,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
@@ -10570,15 +10575,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
@@ -10588,6 +10590,125 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ if (nested_cpu_has_xsaves(vmcs12))
+ vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
+
+ if (cpu_has_vmx_posted_intr())
+ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
+
+ /*
+ * Whether page-faults are trapped is determined by a combination of
+ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+ * If enable_ept, L0 doesn't care about page faults and we should
+ * set all of these to L1's desires. However, if !enable_ept, L0 does
+ * care about (at least some) page faults, and because it is not easy
+ * (if at all possible?) to merge L0 and L1's desires, we simply ask
+ * to exit on each and every L2 page fault. This is done by setting
+ * MASK=MATCH=0 and (see below) EB.PF=1.
+ * Note that below we don't need special code to set EB.PF beyond the
+ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+ * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+ */
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ enable_ept ? vmcs12->page_fault_error_code_match : 0);
+
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
+ if (cpu_has_vmx_apicv()) {
+ vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
+ vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
+ vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
+ vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
+ }
+
+ /*
+ * Set host-state according to L0's settings (vmcs12 is irrelevant here)
+ * Some constant fields are set here by vmx_set_constant_host_state().
+ * Other fields are different per CPU, and will be set later when
+ * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+ */
+ vmx_set_constant_host_state(vmx);
+
+ /*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ set_cr4_guest_host_mask(vmx);
+
+ if (vmx_mpx_supported())
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
+ if (enable_vpid) {
+ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+ else
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ }
+
+ /*
+ * L1 may access the L2's PDPTR, so save them to construct vmcs12
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+}
+
+/*
+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
+ * guest in a way that will both be appropriate to L1's requests, and our
+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
+ * function also has additional necessary side-effects, like setting various
+ * vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry, u32 *entry_failure_code)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 exec_control, vmcs12_exec_ctrl;
+
+ /*
+ * First, the fields that are shadowed. This must be kept in sync
+ * with vmx_shadow_fields.h.
+ */
+
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+
+ /*
+ * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
+ * HOST_FS_BASE, HOST_GS_BASE.
+ */
+
if (from_vmentry &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@ -10610,16 +10731,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
} else {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
-
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
exec_control = vmcs12->pin_based_vm_exec_control;
@@ -10633,7 +10745,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_posted_intr(vmcs12)) {
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
vmx->nested.pi_pending = false;
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
@@ -10644,25 +10755,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_preemption_timer(vmcs12))
vmx_start_preemption_timer(vcpu);
- /*
- * Whether page-faults are trapped is determined by a combination of
- * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
- * If enable_ept, L0 doesn't care about page faults and we should
- * set all of these to L1's desires. However, if !enable_ept, L0 does
- * care about (at least some) page faults, and because it is not easy
- * (if at all possible?) to merge L0 and L1's desires, we simply ask
- * to exit on each and every L2 page fault. This is done by setting
- * MASK=MATCH=0 and (see below) EB.PF=1.
- * Note that below we don't need special code to set EB.PF beyond the
- * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
- * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
- * !enable_ept, EB.PF is 1, so the "or" will always be 1.
- */
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
- enable_ept ? vmcs12->page_fault_error_code_mask : 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
- enable_ept ? vmcs12->page_fault_error_code_match : 0);
-
if (cpu_has_secondary_exec_ctrls()) {
exec_control = vmx->secondary_exec_control;
@@ -10681,22 +10773,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
- /* All VMFUNCs are currently emulated through L0 vmexits. */
- if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
- vmcs_write64(VM_FUNCTION_CONTROL, 0);
-
- if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
- vmcs_write64(EOI_EXIT_BITMAP0,
- vmcs12->eoi_exit_bitmap0);
- vmcs_write64(EOI_EXIT_BITMAP1,
- vmcs12->eoi_exit_bitmap1);
- vmcs_write64(EOI_EXIT_BITMAP2,
- vmcs12->eoi_exit_bitmap2);
- vmcs_write64(EOI_EXIT_BITMAP3,
- vmcs12->eoi_exit_bitmap3);
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
vmcs_write16(GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
- }
/*
* Write an illegal value to APIC_ACCESS_ADDR. Later,
@@ -10709,24 +10788,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
-
- /*
- * Set host-state according to L0's settings (vmcs12 is irrelevant here)
- * Some constant fields are set here by vmx_set_constant_host_state().
- * Other fields are different per CPU, and will be set later when
- * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
- */
- vmx_set_constant_host_state(vmx);
-
- /*
- * Set the MSR load/store lists to match L0's settings.
- */
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
-
/*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
@@ -10758,8 +10819,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
/*
- * Merging of IO bitmap not currently supported.
- * Rather, exit every time.
+ * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
+ * for I/O port accesses.
*/
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
@@ -10796,12 +10857,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- set_cr4_guest_host_mask(vmx);
-
- if (from_vmentry &&
- vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
-
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vcpu->arch.tsc_offset + vmcs12->tsc_offset);
@@ -10810,9 +10865,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
- if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
-
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10823,16 +10875,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* even if spawn a lot of nested vCPUs.
*/
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
}
} else {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
-
}
if (enable_pml) {
@@ -10881,6 +10930,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
+ if (vmx->nested.dirty_vmcs12) {
+ prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+ vmx->nested.dirty_vmcs12 = false;
+ }
+
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
@@ -10889,16 +10943,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
- /*
- * L1 may access the L2's PDPTR, so save them to construct vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
return 0;
@@ -11254,7 +11298,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
- vcpu->arch.exception.pending = false;
return 0;
}
@@ -11535,11 +11578,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* L1's vpid. TODO: move to a more elaborate solution, giving
* each L2 its own vpid and exposing the vpid feature to L1.
*/
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
- /* Restore posted intr vector. */
- if (nested_cpu_has_posted_intr(vmcs12))
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
@@ -11800,6 +11840,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+
+ /*
+ * RDPID causes #UD if disabled through secondary execution controls.
+ * Because it is marked as EmulateOnUD, we need to intercept it here.
+ */
+ if (info->intercept == x86_intercept_rdtscp &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+
+ /* TODO: check more intercepts... */
return X86EMUL_CONTINUE;
}
@@ -12313,6 +12368,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.handle_external_intr = vmx_handle_external_intr,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
+ .umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events,
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
new file mode 100644
index 000000000000..cd0c75f6d037
--- /dev/null
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -0,0 +1,77 @@
+#ifndef SHADOW_FIELD_RO
+#define SHADOW_FIELD_RO(x)
+#endif
+#ifndef SHADOW_FIELD_RW
+#define SHADOW_FIELD_RW(x)
+#endif
+
+/*
+ * We do NOT shadow fields that are modified when L0
+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
+ * VMXON...) executed by L1.
+ * For example, VM_INSTRUCTION_ERROR is read
+ * by L1 if a vmx instruction fails (part of the error path).
+ * Note the code assumes this logic. If for some reason
+ * we start shadowing these fields then we need to
+ * force a shadow sync when L0 emulates vmx instructions
+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+ * by nested_vmx_failValid)
+ *
+ * When adding or removing fields here, note that shadowed
+ * fields must always be synced by prepare_vmcs02, not just
+ * prepare_vmcs02_full.
+ */
+
+/*
+ * Keeping the fields ordered by size is an attempt at improving
+ * branch prediction in vmcs_read_any and vmcs_write_any.
+ */
+
+/* 16-bits */
+SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
+SHADOW_FIELD_RW(GUEST_INTR_STATUS)
+SHADOW_FIELD_RW(GUEST_PML_INDEX)
+SHADOW_FIELD_RW(HOST_FS_SELECTOR)
+SHADOW_FIELD_RW(HOST_GS_SELECTOR)
+
+/* 32-bits */
+SHADOW_FIELD_RO(VM_EXIT_REASON)
+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
+SHADOW_FIELD_RW(EXCEPTION_BITMAP)
+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
+SHADOW_FIELD_RW(TPR_THRESHOLD)
+SHADOW_FIELD_RW(GUEST_CS_LIMIT)
+SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
+
+/* Natural width */
+SHADOW_FIELD_RO(EXIT_QUALIFICATION)
+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
+SHADOW_FIELD_RW(GUEST_RIP)
+SHADOW_FIELD_RW(GUEST_RSP)
+SHADOW_FIELD_RW(GUEST_CR0)
+SHADOW_FIELD_RW(GUEST_CR3)
+SHADOW_FIELD_RW(GUEST_CR4)
+SHADOW_FIELD_RW(GUEST_RFLAGS)
+SHADOW_FIELD_RW(GUEST_CS_BASE)
+SHADOW_FIELD_RW(GUEST_ES_BASE)
+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
+SHADOW_FIELD_RW(CR0_READ_SHADOW)
+SHADOW_FIELD_RW(CR4_READ_SHADOW)
+SHADOW_FIELD_RW(HOST_FS_BASE)
+SHADOW_FIELD_RW(HOST_GS_BASE)
+
+/* 64-bit */
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
+
+#undef SHADOW_FIELD_RO
+#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f9c5171dad2b..c8a0b545ac20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@
#include <asm/pvclock.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#include <asm/mshyperv.h>
+#include <asm/hypervisor.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -177,7 +179,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
- { "efer_reload", VCPU_STAT(efer_reload) },
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
@@ -702,7 +703,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
/* kvm_set_xcr() also depends on this */
- xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+ if (vcpu->arch.xcr0 != host_xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
vcpu->guest_xcr0_loaded = 1;
}
}
@@ -794,6 +796,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+ return 1;
+
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
@@ -1037,6 +1042,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
};
@@ -1378,6 +1384,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+static inline int gtod_is_based_on_tsc(int mode)
+{
+ return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
+}
+
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
@@ -1397,7 +1408,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
* perform request to enable masterclock.
*/
if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+ (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1460,6 +1471,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
vcpu->arch.tsc_offset = offset;
}
+static inline bool kvm_check_tsc_unstable(void)
+{
+#ifdef CONFIG_X86_64
+ /*
+ * TSC is marked unstable when we're running on Hyper-V,
+ * 'TSC page' clocksource is good.
+ */
+ if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
+ return false;
+#endif
+ return check_tsc_unstable();
+}
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
@@ -1505,7 +1529,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
*/
if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
- if (!check_tsc_unstable()) {
+ if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
@@ -1605,18 +1629,43 @@ static u64 read_tsc(void)
return last;
}
-static inline u64 vgettsc(u64 *cycle_now)
+static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
{
long v;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+ u64 tsc_pg_val;
+
+ switch (gtod->clock.vclock_mode) {
+ case VCLOCK_HVCLOCK:
+ tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
+ tsc_timestamp);
+ if (tsc_pg_val != U64_MAX) {
+ /* TSC page valid */
+ *mode = VCLOCK_HVCLOCK;
+ v = (tsc_pg_val - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ } else {
+ /* TSC page invalid */
+ *mode = VCLOCK_NONE;
+ }
+ break;
+ case VCLOCK_TSC:
+ *mode = VCLOCK_TSC;
+ *tsc_timestamp = read_tsc();
+ v = (*tsc_timestamp - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ break;
+ default:
+ *mode = VCLOCK_NONE;
+ }
- *cycle_now = read_tsc();
+ if (*mode == VCLOCK_NONE)
+ *tsc_timestamp = v = 0;
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
return v * gtod->clock.mult;
}
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1625,9 +1674,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
ns += gtod->boot_ns;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1636,7 +1684,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
return mode;
}
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
+static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1645,10 +1693,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1658,25 +1705,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
return mode;
}
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
+/* returns true if host is using TSC based clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+ tsc_timestamp));
}
-/* returns true if host is using tsc clocksource */
+/* returns true if host is using TSC based clocksource */
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
- u64 *cycle_now)
+ u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
}
#endif
@@ -2119,6 +2167,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.pv_time_enabled = false;
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+}
+
static void record_steal_time(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
@@ -2128,7 +2182,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
- vcpu->arch.st.steal.preempted = 0;
+ /*
+ * Doing a TLB flush here, on the guest's behalf, can avoid
+ * expensive IPIs.
+ */
+ if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb(vcpu, false);
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2229,6 +2288,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_SMI_COUNT:
+ if (!msr_info->host_initiated)
+ return 1;
+ vcpu->arch.smi_count = data;
+ break;
case MSR_KVM_WALL_CLOCK_NEW:
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
@@ -2503,6 +2567,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.smbase;
break;
+ case MSR_SMI_COUNT:
+ msr_info->data = vcpu->arch.smi_count;
+ break;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
msr_info->data = 1000ULL;
@@ -2870,13 +2937,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
rdtsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
- if (check_tsc_unstable()) {
+ if (kvm_check_tsc_unstable()) {
u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_vcpu_write_tsc_offset(vcpu, offset);
@@ -2905,7 +2972,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- vcpu->arch.st.steal.preempted = 1;
+ vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal.preempted,
@@ -2939,12 +3006,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
+ /*
+ * If userspace has set any breakpoints or watchpoints, dr6 is restored
+ * on every vmexit, but if not, we might have a stale dr6 from the
+ * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ */
+ set_debugreg(0, 6);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
return kvm_apic_get_state(vcpu, s);
@@ -3473,6 +3546,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void *buffer;
} u;
+ vcpu_load(vcpu);
+
u.buffer = NULL;
switch (ioctl) {
case KVM_GET_LAPIC: {
@@ -3498,8 +3573,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = memdup_user(argp, sizeof(*u.lapic));
- if (IS_ERR(u.lapic))
- return PTR_ERR(u.lapic);
+ if (IS_ERR(u.lapic)) {
+ r = PTR_ERR(u.lapic);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
break;
@@ -3673,8 +3750,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XSAVE: {
u.xsave = memdup_user(argp, sizeof(*u.xsave));
- if (IS_ERR(u.xsave))
- return PTR_ERR(u.xsave);
+ if (IS_ERR(u.xsave)) {
+ r = PTR_ERR(u.xsave);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
@@ -3696,8 +3775,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XCRS: {
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
- if (IS_ERR(u.xcrs))
- return PTR_ERR(u.xcrs);
+ if (IS_ERR(u.xcrs)) {
+ r = PTR_ERR(u.xcrs);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
@@ -3741,6 +3822,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
kfree(u.buffer);
+out_nofree:
+ vcpu_put(vcpu);
return r;
}
@@ -4297,6 +4380,36 @@ set_identity_unlock:
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
+ case KVM_MEMORY_ENCRYPT_OP: {
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_op)
+ r = kvm_x86_ops->mem_enc_op(kvm, argp);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_REG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_reg_region)
+ r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_unreg_region)
+ r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -5705,7 +5818,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
* handle watchpoints yet, those would be handled in
* the emulate_ops.
*/
- if (kvm_vcpu_check_breakpoint(vcpu, &r))
+ if (!(emulation_type & EMULTYPE_SKIP) &&
+ kvm_vcpu_check_breakpoint(vcpu, &r))
return r;
ctxt->interruptibility = 0;
@@ -5891,6 +6005,43 @@ static void tsc_khz_changed(void *data)
__this_cpu_write(cpu_tsc_khz, khz);
}
+#ifdef CONFIG_X86_64
+static void kvm_hyperv_tsc_notifier(void)
+{
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int cpu;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_make_mclock_inprogress_request(kvm);
+
+ hyperv_stop_tsc_emulation();
+
+ /* TSC frequency always matches when on Hyper-V */
+ for_each_present_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ kvm_max_guest_tsc_khz = tsc_khz;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ struct kvm_arch *ka = &kvm->arch;
+
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+
+ pvclock_update_vm_gtod_copy(kvm);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
+
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+ }
+ spin_unlock(&kvm_lock);
+}
+#endif
+
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -6112,9 +6263,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
update_pvclock_gtod(tk);
/* disable master clock if host does not trust, or does not
- * use, TSC clocksource
+ * use, TSC based clocksource.
*/
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
+ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
atomic_read(&kvm_guest_has_master_clock) != 0)
queue_work(system_long_wq, &pvclock_gtod_work);
@@ -6176,6 +6327,9 @@ int kvm_arch_init(void *opaque)
kvm_lapic_init();
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
#endif
return 0;
@@ -6188,6 +6342,10 @@ out:
void kvm_arch_exit(void)
{
+#ifdef CONFIG_X86_64
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ clear_hv_tscchange_cb();
+#endif
kvm_lapic_exit();
perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
@@ -6450,6 +6608,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
kvm_x86_ops->queue_exception(vcpu);
} else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
+ ++vcpu->arch.smi_count;
enter_smm(vcpu);
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
--vcpu->arch.nmi_pending;
@@ -6751,7 +6910,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -6760,12 +6919,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_x86_ops->tlb_flush(vcpu);
-}
-
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@@ -6834,7 +6987,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_vcpu_flush_tlb(vcpu);
+ kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6983,10 +7136,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
*/
- if (kvm_lapic_enabled(vcpu)) {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
- }
+ if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+ kvm_x86_ops->sync_pir_to_irr(vcpu);
if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
@@ -7007,7 +7158,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
+ if (lapic_timer_advance_ns)
+ wait_lapic_expire(vcpu);
guest_enter_irqoff();
if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -7268,8 +7420,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
+ vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
-
kvm_load_guest_fpu(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -7316,11 +7468,14 @@ out:
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@@ -7354,11 +7509,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7388,6 +7546,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ vcpu_put(vcpu);
return 0;
}
@@ -7406,6 +7565,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
{
struct desc_ptr dt;
+ vcpu_load(vcpu);
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7437,12 +7598,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ vcpu_load(vcpu);
+
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
vcpu->arch.pv.pv_unhalted)
@@ -7450,21 +7614,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
else
mp_state->mp_state = vcpu->arch.mp_state;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
+
if (!lapic_in_kernel(vcpu) &&
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
- return -EINVAL;
+ goto out;
/* INITs are latched while in SMM */
if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
- return -EINVAL;
+ goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
@@ -7472,7 +7641,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
} else
vcpu->arch.mp_state = mp_state->mp_state;
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@ -7526,18 +7699,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int mmu_reset_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
- return -EINVAL;
+ goto out;
if (kvm_valid_sregs(vcpu, sregs))
- return -EINVAL;
+ goto out;
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
- return -EINVAL;
+ goto out;
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
@@ -7603,7 +7779,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -7612,6 +7791,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
unsigned long rflags;
int i, r;
+ vcpu_load(vcpu);
+
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
@@ -7657,7 +7838,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
r = 0;
out:
-
+ vcpu_put(vcpu);
return r;
}
@@ -7671,6 +7852,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa_t gpa;
int idx;
+ vcpu_load(vcpu);
+
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -7679,14 +7862,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
tr->writeable = 1;
tr->usermode = 0;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@@ -7696,13 +7882,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -7713,6 +7903,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
@@ -7769,7 +7960,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
@@ -7781,16 +7972,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- int r;
-
kvm_vcpu_mtrr_init(vcpu);
- r = vcpu_load(vcpu);
- if (r)
- return r;
+ vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
- return r;
+ return 0;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -7800,13 +7987,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_postcreate(vcpu);
- if (vcpu_load(vcpu))
+ if (mutex_lock_killable(&vcpu->mutex))
return;
+ vcpu_load(vcpu);
msr.data = 0x0;
msr.index = MSR_IA32_TSC;
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
return;
@@ -7817,11 +8006,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- int r;
vcpu->arch.apf.msr_val = 0;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
@@ -7833,6 +8020,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
+ vcpu->arch.smi_count = 0;
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
@@ -7926,7 +8114,7 @@ int kvm_arch_hardware_enable(void)
return ret;
local_tsc = rdtsc();
- stable = !check_tsc_unstable();
+ stable = !kvm_check_tsc_unstable();
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!stable && vcpu->cpu == smp_processor_id())
@@ -8192,9 +8380,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d0b95b7a90b4..b91215d1fd80 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -12,6 +12,7 @@
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = false;
}
@@ -265,36 +266,8 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
static inline bool kvm_mwait_in_guest(void)
{
- unsigned int eax, ebx, ecx, edx;
-
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
- return false;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- /* All AMD CPUs have a working MWAIT implementation */
- return true;
- case X86_VENDOR_INTEL:
- /* Handle Intel below */
- break;
- default:
- return false;
- }
-
- /*
- * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
- * they would allow guest to stop the CPU completely by disabling
- * interrupts then invoking MWAIT.
- */
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
- return false;
-
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
-
- if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
- return false;
-
- return true;
+ return boot_cpu_has(X86_FEATURE_MWAIT) &&
+ !boot_cpu_has_bug(X86_BUG_MONITOR);
}
#endif
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fe7d57a8fb60..1555bd7d3449 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -678,6 +678,25 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
}
/**
+ * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
+ * of @pfn cannot be overridden by UC MTRR memory type.
+ *
+ * Only to be called when PAT is enabled.
+ *
+ * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
+ * Returns false in other cases.
+ */
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
+{
+ enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
+
+ return cm == _PAGE_CACHE_MODE_UC ||
+ cm == _PAGE_CACHE_MODE_UC_MINUS ||
+ cm == _PAGE_CACHE_MODE_WC;
+}
+EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
+
+/**
* io_reserve_memtype - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
diff --git a/arch/xtensa/include/asm/kasan.h b/arch/xtensa/include/asm/kasan.h
index 54be80876e57..216b6f32c375 100644
--- a/arch/xtensa/include/asm/kasan.h
+++ b/arch/xtensa/include/asm/kasan.h
@@ -10,6 +10,8 @@
#include <linux/sizes.h>
#include <asm/kmem_layout.h>
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
/* Start of area covered by KASAN */
#define KASAN_START_VADDR __XTENSA_UL_CONST(0x90000000)
/* Start of the shadow map */
diff --git a/arch/xtensa/include/uapi/asm/poll.h b/arch/xtensa/include/uapi/asm/poll.h
index e3246d41182c..4d249040b33d 100644
--- a/arch/xtensa/include/uapi/asm/poll.h
+++ b/arch/xtensa/include/uapi/asm/poll.h
@@ -12,26 +12,9 @@
#ifndef _XTENSA_POLL_H
#define _XTENSA_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)0x0100
-#define POLLREMOVE (__force __poll_t)0x0800
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 0x0100
+#define POLLREMOVE 0x0800
#include <asm-generic/poll.h>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 47e6ec7427c4..aeca22d91101 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
}
/*
- * We exploit the bfq_finish_request hook to decrement
- * rq_in_driver, but bfq_finish_request will not be
- * invoked on this request. So, to avoid unbalance,
- * just start this request, without incrementing
- * rq_in_driver. As a negative consequence,
- * rq_in_driver is deceptively lower than it should be
- * while this request is in service. This may cause
- * bfq_schedule_dispatch to be invoked uselessly.
+ * We exploit the bfq_finish_requeue_request hook to
+ * decrement rq_in_driver, but
+ * bfq_finish_requeue_request will not be invoked on
+ * this request. So, to avoid unbalance, just start
+ * this request, without incrementing rq_in_driver. As
+ * a negative consequence, rq_in_driver is deceptively
+ * lower than it should be while this request is in
+ * service. This may cause bfq_schedule_dispatch to be
+ * invoked uselessly.
*
* As for implementing an exact solution, the
- * bfq_finish_request hook, if defined, is probably
- * invoked also on this request. So, by exploiting
- * this hook, we could 1) increment rq_in_driver here,
- * and 2) decrement it in bfq_finish_request. Such a
- * solution would let the value of the counter be
- * always accurate, but it would entail using an extra
- * interface function. This cost seems higher than the
- * benefit, being the frequency of non-elevator-private
+ * bfq_finish_requeue_request hook, if defined, is
+ * probably invoked also on this request. So, by
+ * exploiting this hook, we could 1) increment
+ * rq_in_driver here, and 2) decrement it in
+ * bfq_finish_requeue_request. Such a solution would
+ * let the value of the counter be always accurate,
+ * but it would entail using an extra interface
+ * function. This cost seems higher than the benefit,
+ * being the frequency of non-elevator-private
* requests very low.
*/
goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif
+static void bfq_prepare_request(struct request *rq, struct bio *bio);
+
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
+ if (WARN_ON_ONCE(!bfqq)) {
+ /*
+ * This should never happen. Most likely rq is
+ * a requeued regular request, being
+ * re-inserted without being first
+ * re-prepared. Do a prepare, to avoid
+ * failure.
+ */
+ bfq_prepare_request(rq, rq->bio);
+ bfqq = RQ_BFQQ(rq);
+ }
+
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
* Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_schedule_dispatch(bfqd);
}
-static void bfq_finish_request_body(struct bfq_queue *bfqq)
+static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
{
bfqq->allocated--;
bfq_put_queue(bfqq);
}
-static void bfq_finish_request(struct request *rq)
+/*
+ * Handle either a requeue or a finish for rq. The things to do are
+ * the same in both cases: all references to rq are to be dropped. In
+ * particular, rq is considered completed from the point of view of
+ * the scheduler.
+ */
+static void bfq_finish_requeue_request(struct request *rq)
{
- struct bfq_queue *bfqq;
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
struct bfq_data *bfqd;
- if (!rq->elv.icq)
+ /*
+ * Requeue and finish hooks are invoked in blk-mq without
+ * checking whether the involved request is actually still
+ * referenced in the scheduler. To handle this fact, the
+ * following two checks make this function exit in case of
+ * spurious invocations, for which there is nothing to do.
+ *
+ * First, check whether rq has nothing to do with an elevator.
+ */
+ if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
+ return;
+
+ /*
+ * rq either is not associated with any icq, or is an already
+ * requeued request that has not (yet) been re-inserted into
+ * a bfq_queue.
+ */
+ if (!rq->elv.icq || !bfqq)
return;
- bfqq = RQ_BFQQ(rq);
bfqd = bfqq->bfqd;
if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
spin_lock_irqsave(&bfqd->lock, flags);
bfq_completed_request(bfqq, bfqd);
- bfq_finish_request_body(bfqq);
+ bfq_finish_requeue_request_body(bfqq);
spin_unlock_irqrestore(&bfqd->lock, flags);
} else {
/*
* Request rq may be still/already in the scheduler,
- * in which case we need to remove it. And we cannot
+ * in which case we need to remove it (this should
+ * never happen in case of requeue). And we cannot
* defer such a check and removal, to avoid
* inconsistencies in the time interval from the end
* of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
bfqg_stats_update_io_remove(bfqq_group(bfqq),
rq->cmd_flags);
}
- bfq_finish_request_body(bfqq);
+ bfq_finish_requeue_request_body(bfqq);
}
+ /*
+ * Reset private fields. In case of a requeue, this allows
+ * this function to correctly do nothing if it is spuriously
+ * invoked again on this same request (see the check at the
+ * beginning of the function). Probably, a better general
+ * design would be to prevent blk-mq from invoking the requeue
+ * or finish hooks of an elevator, for a request that is not
+ * referred by that elevator.
+ *
+ * Resetting the following fields would break the
+ * request-insertion logic if rq is re-inserted into a bfq
+ * internal queue, without a re-preparation. Here we assume
+ * that re-insertions of requeued requests, without
+ * re-preparation, can happen only for pass_through or at_head
+ * requests (which are not re-inserted into bfq internal
+ * queues).
+ */
rq->elv.priv[0] = NULL;
rq->elv.priv[1] = NULL;
}
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
.ops.mq = {
.limit_depth = bfq_limit_depth,
.prepare_request = bfq_prepare_request,
- .finish_request = bfq_finish_request,
+ .requeue_request = bfq_finish_requeue_request,
+ .finish_request = bfq_finish_requeue_request,
.exit_icq = bfq_exit_icq,
.insert_requests = bfq_insert_requests,
.dispatch_request = bfq_dispatch_request,
diff --git a/block/blk-core.c b/block/blk-core.c
index d0d104268f1a..2d1a7bbe0634 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
+#include <linux/bpf.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
return false;
}
+static noinline int should_fail_bio(struct bio *bio)
+{
+ if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+ return -EIO;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
+
/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
@@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;
- if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+ if (should_fail_bio(bio))
goto end_io;
if (!bio->bi_partno) {
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index ae8de9780085..f92fc84b5e2c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
static int wbt_data_dir(const struct request *rq)
{
- return rq_data_dir(rq);
+ const int op = req_op(rq);
+
+ if (op == REQ_OP_READ)
+ return READ;
+ else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+ return WRITE;
+
+ /* don't account */
+ return -1;
}
int wbt_init(struct request_queue *q)
diff --git a/block/bsg.c b/block/bsg.c
index 2e2c1e222209..06dc96e1f670 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -849,9 +849,9 @@ static __poll_t bsg_poll(struct file *file, poll_table *wait)
spin_lock_irq(&bd->lock);
if (!list_empty(&bd->done_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (bd->queued_cmds < bd->max_queue)
- mask |= POLLOUT;
+ mask |= EPOLLOUT;
spin_unlock_irq(&bd->lock);
return mask;
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 0f8d8d5523c3..c49766b03165 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -735,9 +735,9 @@ void af_alg_wmem_wakeup(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM |
- POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM |
+ EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -800,9 +800,9 @@ void af_alg_data_wakeup(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLRDNORM |
- POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLRDNORM |
+ EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
@@ -1076,10 +1076,10 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
mask = 0;
if (!ctx->more || ctx->used)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (af_alg_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c
index 2ff5c8c04e3b..f21c99ec46ee 100644
--- a/drivers/acpi/acpi_dbg.c
+++ b/drivers/acpi/acpi_dbg.c
@@ -724,9 +724,9 @@ static __poll_t acpi_aml_poll(struct file *file, poll_table *wait)
poll_wait(file, &acpi_aml_io.wait, wait);
if (acpi_aml_user_readable())
- masks |= POLLIN | POLLRDNORM;
+ masks |= EPOLLIN | EPOLLRDNORM;
if (acpi_aml_user_writable())
- masks |= POLLOUT | POLLWRNORM;
+ masks |= EPOLLOUT | EPOLLWRNORM;
return masks;
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d21040c5d343..15e3d3c2260d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4371,7 +4371,7 @@ static int binder_thread_release(struct binder_proc *proc,
*/
if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
waitqueue_active(&thread->wait)) {
- wake_up_poll(&thread->wait, POLLHUP | POLLFREE);
+ wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE);
}
binder_inner_proc_unlock(thread->proc);
@@ -4401,7 +4401,7 @@ static __poll_t binder_poll(struct file *filp,
poll_wait(filp, &thread->wait, wait);
if (binder_has_work(thread, wait_for_proc_work))
- return POLLIN;
+ return EPOLLIN;
return 0;
}
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 0521748a1972..22f9145a426f 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -306,9 +306,9 @@ static __poll_t vhci_poll(struct file *file, poll_table *wait)
poll_wait(file, &data->read_wait, wait);
if (!skb_queue_empty(&data->readq))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
}
static void vhci_open_timeout(struct work_struct *work)
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index a2a1c1478cd0..a5e2f9e557ea 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -241,7 +241,7 @@ static __poll_t apm_poll(struct file *fp, poll_table * wait)
struct apm_user *as = fp->private_data;
poll_wait(fp, &apm_waitqueue, wait);
- return queue_empty(&as->queue) ? 0 : POLLIN | POLLRDNORM;
+ return queue_empty(&as->queue) ? 0 : EPOLLIN | EPOLLRDNORM;
}
/*
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 2f92cc46698b..06749e295ada 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -414,7 +414,7 @@ static __poll_t dsp56k_poll(struct file *file, poll_table *wait)
{
case DSP56K_DEV_56001:
/* poll_wait(file, ???, wait); */
- return POLLIN | POLLRDNORM | POLLOUT;
+ return EPOLLIN | EPOLLRDNORM | EPOLLOUT;
default:
printk("DSP56k driver: Unknown minor device: %d\n", dev);
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 2697c22e3be2..f882460b5a44 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -62,7 +62,7 @@
#include <linux/uaccess.h> /* for get_user, etc. */
#include <linux/wait.h> /* for wait_queue */
#include <linux/init.h> /* for __init, module_{init,exit} */
-#include <linux/poll.h> /* for POLLIN, etc. */
+#include <linux/poll.h> /* for EPOLLIN, etc. */
#include <linux/dtlk.h> /* local header file for DoubleTalk values */
#ifdef TRACING
@@ -244,11 +244,11 @@ static __poll_t dtlk_poll(struct file *file, poll_table * wait)
if (dtlk_has_indexing && dtlk_readable()) {
del_timer(&dtlk_timer);
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
}
if (dtlk_writeable()) {
del_timer(&dtlk_timer);
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
/* there are no exception conditions */
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index dbed4953f86c..be426eb2a353 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -359,7 +359,7 @@ static __poll_t hpet_poll(struct file *file, poll_table * wait)
spin_unlock_irq(&hpet_lock);
if (v != 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
index 7992c870b0a2..c95b93b7598b 100644
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -349,10 +349,10 @@ static __poll_t bt_bmc_poll(struct file *file, poll_table *wait)
ctrl = bt_inb(bt_bmc, BT_CTRL);
if (ctrl & BT_CTRL_H2B_ATN)
- mask |= POLLIN;
+ mask |= EPOLLIN;
if (!(ctrl & (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN)))
- mask |= POLLOUT;
+ mask |= EPOLLOUT;
return mask;
}
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index a011a7739f5e..5f1bc9174735 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -89,7 +89,7 @@ static __poll_t ipmi_poll(struct file *file, poll_table *wait)
spin_lock_irqsave(&priv->recv_msg_lock, flags);
if (!list_empty(&(priv->recv_msgs)))
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 34bc1f3ca414..a58acdcf7414 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -895,7 +895,7 @@ static __poll_t ipmi_poll(struct file *file, poll_table *wait)
spin_lock(&ipmi_read_lock);
if (data_to_read)
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
spin_unlock(&ipmi_read_lock);
return mask;
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 819fe37a3683..f80965407d3c 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -423,9 +423,9 @@ static __poll_t cm4040_poll(struct file *filp, poll_table *wait)
poll_wait(filp, &dev->poll_wait, wait);
if (test_and_clear_bit(BS_READABLE, &dev->buffer_status))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (test_and_clear_bit(BS_WRITABLE, &dev->buffer_status))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
DEBUGP(2, dev, "<- cm4040_poll(%u)\n", mask);
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 7a56d1a13ec3..1ae77b41050a 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -776,7 +776,7 @@ static __poll_t pp_poll(struct file *file, poll_table *wait)
poll_wait(file, &pp->irq_wait, wait);
if (atomic_read(&pp->irqc))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 80f2c326db47..e5b3d3ba4660 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1793,9 +1793,9 @@ random_poll(struct file *file, poll_table * wait)
poll_wait(file, &random_write_wait, wait);
mask = 0;
if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index c6a317120a55..0c858d027bf3 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -804,7 +804,7 @@ static __poll_t rtc_poll(struct file *file, poll_table *wait)
spin_unlock_irq(&rtc_lock);
if (l != 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
#endif
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 7f49fa0f41d7..5918ea7499bb 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -340,10 +340,10 @@ scdrv_poll(struct file *file, struct poll_table_struct *wait)
if (status > 0) {
if (status & SAL_IROUTER_INTR_RECV) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (status & SAL_IROUTER_INTR_XMIT) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index fc041c462aa4..186689833231 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -944,7 +944,7 @@ static __poll_t sonypi_misc_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &sonypi_device.fifo_proc_list, wait);
if (kfifo_len(&sonypi_device.fifo))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 674218b50b13..e4f79f920450 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -180,15 +180,15 @@ static __poll_t vtpm_proxy_fops_poll(struct file *filp, poll_table *wait)
poll_wait(filp, &proxy_dev->wq, wait);
- ret = POLLOUT;
+ ret = EPOLLOUT;
mutex_lock(&proxy_dev->buf_lock);
if (proxy_dev->req_len)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
if (!(proxy_dev->state & STATE_OPENED_FLAG))
- ret |= POLLHUP;
+ ret |= EPOLLHUP;
mutex_unlock(&proxy_dev->buf_lock);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 813a2e46824d..468f06134012 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -992,15 +992,15 @@ static __poll_t port_fops_poll(struct file *filp, poll_table *wait)
if (!port->guest_connected) {
/* Port got unplugged */
- return POLLHUP;
+ return EPOLLHUP;
}
ret = 0;
if (!will_read_block(port))
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
if (!will_write_block(port))
- ret |= POLLOUT;
+ ret |= EPOLLOUT;
if (!port->host_connected)
- ret |= POLLHUP;
+ ret |= EPOLLHUP;
return ret;
}
diff --git a/drivers/char/xillybus/xillybus_core.c b/drivers/char/xillybus/xillybus_core.c
index 88e1cf475d3f..a11af94e2e65 100644
--- a/drivers/char/xillybus/xillybus_core.c
+++ b/drivers/char/xillybus/xillybus_core.c
@@ -1758,15 +1758,15 @@ static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
spin_lock_irqsave(&channel->wr_spinlock, flags);
if (!channel->wr_empty || channel->wr_ready)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (channel->wr_hangup)
/*
- * Not POLLHUP, because its behavior is in the
- * mist, and POLLIN does what we want: Wake up
+ * Not EPOLLHUP, because its behavior is in the
+ * mist, and EPOLLIN does what we want: Wake up
* the read file descriptor so it sees EOF.
*/
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&channel->wr_spinlock, flags);
}
@@ -1781,12 +1781,12 @@ static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
spin_lock_irqsave(&channel->rd_spinlock, flags);
if (!channel->rd_full)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
spin_unlock_irqrestore(&channel->rd_spinlock, flags);
}
if (channel->endpoint->fatal_error)
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 6d626606b9c5..b9dfae47aefd 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -1,5 +1,6 @@
config CRYPTO_DEV_CCP_DD
tristate "Secure Processor device driver"
+ depends on CPU_SUP_AMD || ARM64
default m
help
Provides AMD Secure Processor device driver.
@@ -32,3 +33,14 @@ config CRYPTO_DEV_CCP_CRYPTO
Support for using the cryptographic API with the AMD Cryptographic
Coprocessor. This module supports offload of SHA and AES algorithms.
If you choose 'M' here, this module will be called ccp_crypto.
+
+config CRYPTO_DEV_SP_PSP
+ bool "Platform Security Processor (PSP) device"
+ default y
+ depends on CRYPTO_DEV_CCP_DD && X86_64
+ help
+ Provide support for the AMD Platform Security Processor (PSP).
+ The PSP is a dedicated processor that provides support for key
+ management commands in Secure Encrypted Virtualization (SEV) mode,
+ along with software-based Trusted Execution Environment (TEE) to
+ enable third-party trusted applications.
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index c4ce726b931e..51d1c0cf66c7 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -8,6 +8,7 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
ccp-dmaengine.o \
ccp-debugfs.o
ccp-$(CONFIG_PCI) += sp-pci.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o
obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
new file mode 100644
index 000000000000..fcfa5b1eae61
--- /dev/null
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -0,0 +1,805 @@
+/*
+ * AMD Platform Security Processor (PSP) interface
+ *
+ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/ccp.h>
+
+#include "sp-dev.h"
+#include "psp-dev.h"
+
+#define DEVICE_NAME "sev"
+
+static DEFINE_MUTEX(sev_cmd_mutex);
+static struct sev_misc_dev *misc_dev;
+static struct psp_device *psp_master;
+
+static struct psp_device *psp_alloc_struct(struct sp_device *sp)
+{
+ struct device *dev = sp->dev;
+ struct psp_device *psp;
+
+ psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
+ if (!psp)
+ return NULL;
+
+ psp->dev = dev;
+ psp->sp = sp;
+
+ snprintf(psp->name, sizeof(psp->name), "psp-%u", sp->ord);
+
+ return psp;
+}
+
+static irqreturn_t psp_irq_handler(int irq, void *data)
+{
+ struct psp_device *psp = data;
+ unsigned int status;
+ int reg;
+
+ /* Read the interrupt status: */
+ status = ioread32(psp->io_regs + PSP_P2CMSG_INTSTS);
+
+ /* Check if it is command completion: */
+ if (!(status & BIT(PSP_CMD_COMPLETE_REG)))
+ goto done;
+
+ /* Check if it is SEV command completion: */
+ reg = ioread32(psp->io_regs + PSP_CMDRESP);
+ if (reg & PSP_CMDRESP_RESP) {
+ psp->sev_int_rcvd = 1;
+ wake_up(&psp->sev_int_queue);
+ }
+
+done:
+ /* Clear the interrupt status by writing the same value we read. */
+ iowrite32(status, psp->io_regs + PSP_P2CMSG_INTSTS);
+
+ return IRQ_HANDLED;
+}
+
+static void sev_wait_cmd_ioc(struct psp_device *psp, unsigned int *reg)
+{
+ psp->sev_int_rcvd = 0;
+
+ wait_event(psp->sev_int_queue, psp->sev_int_rcvd);
+ *reg = ioread32(psp->io_regs + PSP_CMDRESP);
+}
+
+static int sev_cmd_buffer_len(int cmd)
+{
+ switch (cmd) {
+ case SEV_CMD_INIT: return sizeof(struct sev_data_init);
+ case SEV_CMD_PLATFORM_STATUS: return sizeof(struct sev_user_data_status);
+ case SEV_CMD_PEK_CSR: return sizeof(struct sev_data_pek_csr);
+ case SEV_CMD_PEK_CERT_IMPORT: return sizeof(struct sev_data_pek_cert_import);
+ case SEV_CMD_PDH_CERT_EXPORT: return sizeof(struct sev_data_pdh_cert_export);
+ case SEV_CMD_LAUNCH_START: return sizeof(struct sev_data_launch_start);
+ case SEV_CMD_LAUNCH_UPDATE_DATA: return sizeof(struct sev_data_launch_update_data);
+ case SEV_CMD_LAUNCH_UPDATE_VMSA: return sizeof(struct sev_data_launch_update_vmsa);
+ case SEV_CMD_LAUNCH_FINISH: return sizeof(struct sev_data_launch_finish);
+ case SEV_CMD_LAUNCH_MEASURE: return sizeof(struct sev_data_launch_measure);
+ case SEV_CMD_ACTIVATE: return sizeof(struct sev_data_activate);
+ case SEV_CMD_DEACTIVATE: return sizeof(struct sev_data_deactivate);
+ case SEV_CMD_DECOMMISSION: return sizeof(struct sev_data_decommission);
+ case SEV_CMD_GUEST_STATUS: return sizeof(struct sev_data_guest_status);
+ case SEV_CMD_DBG_DECRYPT: return sizeof(struct sev_data_dbg);
+ case SEV_CMD_DBG_ENCRYPT: return sizeof(struct sev_data_dbg);
+ case SEV_CMD_SEND_START: return sizeof(struct sev_data_send_start);
+ case SEV_CMD_SEND_UPDATE_DATA: return sizeof(struct sev_data_send_update_data);
+ case SEV_CMD_SEND_UPDATE_VMSA: return sizeof(struct sev_data_send_update_vmsa);
+ case SEV_CMD_SEND_FINISH: return sizeof(struct sev_data_send_finish);
+ case SEV_CMD_RECEIVE_START: return sizeof(struct sev_data_receive_start);
+ case SEV_CMD_RECEIVE_FINISH: return sizeof(struct sev_data_receive_finish);
+ case SEV_CMD_RECEIVE_UPDATE_DATA: return sizeof(struct sev_data_receive_update_data);
+ case SEV_CMD_RECEIVE_UPDATE_VMSA: return sizeof(struct sev_data_receive_update_vmsa);
+ case SEV_CMD_LAUNCH_UPDATE_SECRET: return sizeof(struct sev_data_launch_secret);
+ default: return 0;
+ }
+
+ return 0;
+}
+
+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
+{
+ struct psp_device *psp = psp_master;
+ unsigned int phys_lsb, phys_msb;
+ unsigned int reg, ret = 0;
+
+ if (!psp)
+ return -ENODEV;
+
+ /* Get the physical address of the command buffer */
+ phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
+ phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
+
+ dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x\n",
+ cmd, phys_msb, phys_lsb);
+
+ print_hex_dump_debug("(in): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+ sev_cmd_buffer_len(cmd), false);
+
+ iowrite32(phys_lsb, psp->io_regs + PSP_CMDBUFF_ADDR_LO);
+ iowrite32(phys_msb, psp->io_regs + PSP_CMDBUFF_ADDR_HI);
+
+ reg = cmd;
+ reg <<= PSP_CMDRESP_CMD_SHIFT;
+ reg |= PSP_CMDRESP_IOC;
+ iowrite32(reg, psp->io_regs + PSP_CMDRESP);
+
+ /* wait for command completion */
+ sev_wait_cmd_ioc(psp, &reg);
+
+ if (psp_ret)
+ *psp_ret = reg & PSP_CMDRESP_ERR_MASK;
+
+ if (reg & PSP_CMDRESP_ERR_MASK) {
+ dev_dbg(psp->dev, "sev command %#x failed (%#010x)\n",
+ cmd, reg & PSP_CMDRESP_ERR_MASK);
+ ret = -EIO;
+ }
+
+ print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+ sev_cmd_buffer_len(cmd), false);
+
+ return ret;
+}
+
+static int sev_do_cmd(int cmd, void *data, int *psp_ret)
+{
+ int rc;
+
+ mutex_lock(&sev_cmd_mutex);
+ rc = __sev_do_cmd_locked(cmd, data, psp_ret);
+ mutex_unlock(&sev_cmd_mutex);
+
+ return rc;
+}
+
+static int __sev_platform_init_locked(int *error)
+{
+ struct psp_device *psp = psp_master;
+ int rc = 0;
+
+ if (!psp)
+ return -ENODEV;
+
+ if (psp->sev_state == SEV_STATE_INIT)
+ return 0;
+
+ rc = __sev_do_cmd_locked(SEV_CMD_INIT, &psp->init_cmd_buf, error);
+ if (rc)
+ return rc;
+
+ psp->sev_state = SEV_STATE_INIT;
+ dev_dbg(psp->dev, "SEV firmware initialized\n");
+
+ return rc;
+}
+
+int sev_platform_init(int *error)
+{
+ int rc;
+
+ mutex_lock(&sev_cmd_mutex);
+ rc = __sev_platform_init_locked(error);
+ mutex_unlock(&sev_cmd_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(sev_platform_init);
+
+static int __sev_platform_shutdown_locked(int *error)
+{
+ int ret;
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
+ if (ret)
+ return ret;
+
+ psp_master->sev_state = SEV_STATE_UNINIT;
+ dev_dbg(psp_master->dev, "SEV firmware shutdown\n");
+
+ return ret;
+}
+
+static int sev_platform_shutdown(int *error)
+{
+ int rc;
+
+ mutex_lock(&sev_cmd_mutex);
+ rc = __sev_platform_shutdown_locked(NULL);
+ mutex_unlock(&sev_cmd_mutex);
+
+ return rc;
+}
+
+static int sev_get_platform_state(int *state, int *error)
+{
+ int rc;
+
+ rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS,
+ &psp_master->status_cmd_buf, error);
+ if (rc)
+ return rc;
+
+ *state = psp_master->status_cmd_buf.state;
+ return rc;
+}
+
+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+{
+ int state, rc;
+
+ /*
+ * The SEV spec requires that FACTORY_RESET must be issued in
+ * UNINIT state. Before we go further lets check if any guest is
+ * active.
+ *
+ * If FW is in WORKING state then deny the request otherwise issue
+ * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET.
+ *
+ */
+ rc = sev_get_platform_state(&state, &argp->error);
+ if (rc)
+ return rc;
+
+ if (state == SEV_STATE_WORKING)
+ return -EBUSY;
+
+ if (state == SEV_STATE_INIT) {
+ rc = __sev_platform_shutdown_locked(&argp->error);
+ if (rc)
+ return rc;
+ }
+
+ return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
+}
+
+static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
+{
+ struct sev_user_data_status *data = &psp_master->status_cmd_buf;
+ int ret;
+
+ ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, data, &argp->error);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)argp->data, data, sizeof(*data)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
+{
+ int rc;
+
+ if (psp_master->sev_state == SEV_STATE_UNINIT) {
+ rc = __sev_platform_init_locked(&argp->error);
+ if (rc)
+ return rc;
+ }
+
+ return __sev_do_cmd_locked(cmd, 0, &argp->error);
+}
+
+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
+{
+ struct sev_user_data_pek_csr input;
+ struct sev_data_pek_csr *data;
+ void *blob = NULL;
+ int ret;
+
+ if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* userspace wants to query CSR length */
+ if (!input.address || !input.length)
+ goto cmd;
+
+ /* allocate a physically contiguous buffer to store the CSR blob */
+ if (!access_ok(VERIFY_WRITE, input.address, input.length) ||
+ input.length > SEV_FW_BLOB_MAX_SIZE) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ blob = kmalloc(input.length, GFP_KERNEL);
+ if (!blob) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ data->address = __psp_pa(blob);
+ data->len = input.length;
+
+cmd:
+ if (psp_master->sev_state == SEV_STATE_UNINIT) {
+ ret = __sev_platform_init_locked(&argp->error);
+ if (ret)
+ goto e_free_blob;
+ }
+
+ ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, data, &argp->error);
+
+ /* If we query the CSR length, FW responded with expected data. */
+ input.length = data->len;
+
+ if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+ ret = -EFAULT;
+ goto e_free_blob;
+ }
+
+ if (blob) {
+ if (copy_to_user((void __user *)input.address, blob, input.length))
+ ret = -EFAULT;
+ }
+
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+void *psp_copy_user_blob(u64 __user uaddr, u32 len)
+{
+ void *data;
+
+ if (!uaddr || !len)
+ return ERR_PTR(-EINVAL);
+
+ /* verify that blob length does not exceed our limit */
+ if (len > SEV_FW_BLOB_MAX_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ data = kmalloc(len, GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(data, (void __user *)(uintptr_t)uaddr, len))
+ goto e_free;
+
+ return data;
+
+e_free:
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(psp_copy_user_blob);
+
+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
+{
+ struct sev_user_data_pek_cert_import input;
+ struct sev_data_pek_cert_import *data;
+ void *pek_blob, *oca_blob;
+ int ret;
+
+ if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* copy PEK certificate blobs from userspace */
+ pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len);
+ if (IS_ERR(pek_blob)) {
+ ret = PTR_ERR(pek_blob);
+ goto e_free;
+ }
+
+ data->pek_cert_address = __psp_pa(pek_blob);
+ data->pek_cert_len = input.pek_cert_len;
+
+ /* copy PEK certificate blobs from userspace */
+ oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len);
+ if (IS_ERR(oca_blob)) {
+ ret = PTR_ERR(oca_blob);
+ goto e_free_pek;
+ }
+
+ data->oca_cert_address = __psp_pa(oca_blob);
+ data->oca_cert_len = input.oca_cert_len;
+
+ /* If platform is not in INIT state then transition it to INIT */
+ if (psp_master->sev_state != SEV_STATE_INIT) {
+ ret = __sev_platform_init_locked(&argp->error);
+ if (ret)
+ goto e_free_oca;
+ }
+
+ ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, data, &argp->error);
+
+e_free_oca:
+ kfree(oca_blob);
+e_free_pek:
+ kfree(pek_blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+{
+ struct sev_user_data_pdh_cert_export input;
+ void *pdh_blob = NULL, *cert_blob = NULL;
+ struct sev_data_pdh_cert_export *data;
+ int ret;
+
+ if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* Userspace wants to query the certificate length. */
+ if (!input.pdh_cert_address ||
+ !input.pdh_cert_len ||
+ !input.cert_chain_address)
+ goto cmd;
+
+ /* Allocate a physically contiguous buffer to store the PDH blob. */
+ if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
+ !access_ok(VERIFY_WRITE, input.pdh_cert_address, input.pdh_cert_len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ /* Allocate a physically contiguous buffer to store the cert chain blob. */
+ if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
+ !access_ok(VERIFY_WRITE, input.cert_chain_address, input.cert_chain_len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
+ if (!pdh_blob) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ data->pdh_cert_address = __psp_pa(pdh_blob);
+ data->pdh_cert_len = input.pdh_cert_len;
+
+ cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
+ if (!cert_blob) {
+ ret = -ENOMEM;
+ goto e_free_pdh;
+ }
+
+ data->cert_chain_address = __psp_pa(cert_blob);
+ data->cert_chain_len = input.cert_chain_len;
+
+cmd:
+ /* If platform is not in INIT state then transition it to INIT. */
+ if (psp_master->sev_state != SEV_STATE_INIT) {
+ ret = __sev_platform_init_locked(&argp->error);
+ if (ret)
+ goto e_free_cert;
+ }
+
+ ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, data, &argp->error);
+
+ /* If we query the length, FW responded with expected data. */
+ input.cert_chain_len = data->cert_chain_len;
+ input.pdh_cert_len = data->pdh_cert_len;
+
+ if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+ ret = -EFAULT;
+ goto e_free_cert;
+ }
+
+ if (pdh_blob) {
+ if (copy_to_user((void __user *)input.pdh_cert_address,
+ pdh_blob, input.pdh_cert_len)) {
+ ret = -EFAULT;
+ goto e_free_cert;
+ }
+ }
+
+ if (cert_blob) {
+ if (copy_to_user((void __user *)input.cert_chain_address,
+ cert_blob, input.cert_chain_len))
+ ret = -EFAULT;
+ }
+
+e_free_cert:
+ kfree(cert_blob);
+e_free_pdh:
+ kfree(pdh_blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct sev_issue_cmd input;
+ int ret = -EFAULT;
+
+ if (!psp_master)
+ return -ENODEV;
+
+ if (ioctl != SEV_ISSUE_CMD)
+ return -EINVAL;
+
+ if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd)))
+ return -EFAULT;
+
+ if (input.cmd > SEV_MAX)
+ return -EINVAL;
+
+ mutex_lock(&sev_cmd_mutex);
+
+ switch (input.cmd) {
+
+ case SEV_FACTORY_RESET:
+ ret = sev_ioctl_do_reset(&input);
+ break;
+ case SEV_PLATFORM_STATUS:
+ ret = sev_ioctl_do_platform_status(&input);
+ break;
+ case SEV_PEK_GEN:
+ ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
+ break;
+ case SEV_PDH_GEN:
+ ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
+ break;
+ case SEV_PEK_CSR:
+ ret = sev_ioctl_do_pek_csr(&input);
+ break;
+ case SEV_PEK_CERT_IMPORT:
+ ret = sev_ioctl_do_pek_import(&input);
+ break;
+ case SEV_PDH_CERT_EXPORT:
+ ret = sev_ioctl_do_pdh_export(&input);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd)))
+ ret = -EFAULT;
+out:
+ mutex_unlock(&sev_cmd_mutex);
+
+ return ret;
+}
+
+static const struct file_operations sev_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = sev_ioctl,
+};
+
+int sev_platform_status(struct sev_user_data_status *data, int *error)
+{
+ return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_platform_status);
+
+int sev_guest_deactivate(struct sev_data_deactivate *data, int *error)
+{
+ return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_deactivate);
+
+int sev_guest_activate(struct sev_data_activate *data, int *error)
+{
+ return sev_do_cmd(SEV_CMD_ACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_activate);
+
+int sev_guest_decommission(struct sev_data_decommission *data, int *error)
+{
+ return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_decommission);
+
+int sev_guest_df_flush(int *error)
+{
+ return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_df_flush);
+
+static void sev_exit(struct kref *ref)
+{
+ struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
+
+ misc_deregister(&misc_dev->misc);
+}
+
+static int sev_misc_init(struct psp_device *psp)
+{
+ struct device *dev = psp->dev;
+ int ret;
+
+ /*
+ * SEV feature support can be detected on multiple devices but the SEV
+ * FW commands must be issued on the master. During probe, we do not
+ * know the master hence we create /dev/sev on the first device probe.
+ * sev_do_cmd() finds the right master device to which to issue the
+ * command to the firmware.
+ */
+ if (!misc_dev) {
+ struct miscdevice *misc;
+
+ misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
+ if (!misc_dev)
+ return -ENOMEM;
+
+ misc = &misc_dev->misc;
+ misc->minor = MISC_DYNAMIC_MINOR;
+ misc->name = DEVICE_NAME;
+ misc->fops = &sev_fops;
+
+ ret = misc_register(misc);
+ if (ret)
+ return ret;
+
+ kref_init(&misc_dev->refcount);
+ } else {
+ kref_get(&misc_dev->refcount);
+ }
+
+ init_waitqueue_head(&psp->sev_int_queue);
+ psp->sev_misc = misc_dev;
+ dev_dbg(dev, "registered SEV device\n");
+
+ return 0;
+}
+
+static int sev_init(struct psp_device *psp)
+{
+ /* Check if device supports SEV feature */
+ if (!(ioread32(psp->io_regs + PSP_FEATURE_REG) & 1)) {
+ dev_dbg(psp->dev, "device does not support SEV\n");
+ return 1;
+ }
+
+ return sev_misc_init(psp);
+}
+
+int psp_dev_init(struct sp_device *sp)
+{
+ struct device *dev = sp->dev;
+ struct psp_device *psp;
+ int ret;
+
+ ret = -ENOMEM;
+ psp = psp_alloc_struct(sp);
+ if (!psp)
+ goto e_err;
+
+ sp->psp_data = psp;
+
+ psp->vdata = (struct psp_vdata *)sp->dev_vdata->psp_vdata;
+ if (!psp->vdata) {
+ ret = -ENODEV;
+ dev_err(dev, "missing driver data\n");
+ goto e_err;
+ }
+
+ psp->io_regs = sp->io_map + psp->vdata->offset;
+
+ /* Disable and clear interrupts until ready */
+ iowrite32(0, psp->io_regs + PSP_P2CMSG_INTEN);
+ iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTSTS);
+
+ /* Request an irq */
+ ret = sp_request_psp_irq(psp->sp, psp_irq_handler, psp->name, psp);
+ if (ret) {
+ dev_err(dev, "psp: unable to allocate an IRQ\n");
+ goto e_err;
+ }
+
+ ret = sev_init(psp);
+ if (ret)
+ goto e_irq;
+
+ if (sp->set_psp_master_device)
+ sp->set_psp_master_device(sp);
+
+ /* Enable interrupt */
+ iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTEN);
+
+ return 0;
+
+e_irq:
+ sp_free_psp_irq(psp->sp, psp);
+e_err:
+ sp->psp_data = NULL;
+
+ dev_notice(dev, "psp initialization failed\n");
+
+ return ret;
+}
+
+void psp_dev_destroy(struct sp_device *sp)
+{
+ struct psp_device *psp = sp->psp_data;
+
+ if (psp->sev_misc)
+ kref_put(&misc_dev->refcount, sev_exit);
+
+ sp_free_psp_irq(sp, psp);
+}
+
+int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
+ void *data, int *error)
+{
+ if (!filep || filep->f_op != &sev_fops)
+ return -EBADF;
+
+ return sev_do_cmd(cmd, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
+
+void psp_pci_init(void)
+{
+ struct sev_user_data_status *status;
+ struct sp_device *sp;
+ int error, rc;
+
+ sp = sp_get_psp_master_device();
+ if (!sp)
+ return;
+
+ psp_master = sp->psp_data;
+
+ /* Initialize the platform */
+ rc = sev_platform_init(&error);
+ if (rc) {
+ dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error);
+ goto err;
+ }
+
+ /* Display SEV firmware version */
+ status = &psp_master->status_cmd_buf;
+ rc = sev_platform_status(status, &error);
+ if (rc) {
+ dev_err(sp->dev, "SEV: failed to get status error %#x\n", error);
+ goto err;
+ }
+
+ dev_info(sp->dev, "SEV API:%d.%d build:%d\n", status->api_major,
+ status->api_minor, status->build);
+ return;
+
+err:
+ psp_master = NULL;
+}
+
+void psp_pci_exit(void)
+{
+ if (!psp_master)
+ return;
+
+ sev_platform_shutdown(NULL);
+}
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
new file mode 100644
index 000000000000..c81f0b11287a
--- /dev/null
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -0,0 +1,83 @@
+/*
+ * AMD Platform Security Processor (PSP) interface driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PSP_DEV_H__
+#define __PSP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/psp-sev.h>
+#include <linux/miscdevice.h>
+
+#include "sp-dev.h"
+
+#define PSP_C2PMSG(_num) ((_num) << 2)
+#define PSP_CMDRESP PSP_C2PMSG(32)
+#define PSP_CMDBUFF_ADDR_LO PSP_C2PMSG(56)
+#define PSP_CMDBUFF_ADDR_HI PSP_C2PMSG(57)
+#define PSP_FEATURE_REG PSP_C2PMSG(63)
+
+#define PSP_P2CMSG(_num) ((_num) << 2)
+#define PSP_CMD_COMPLETE_REG 1
+#define PSP_CMD_COMPLETE PSP_P2CMSG(PSP_CMD_COMPLETE_REG)
+
+#define PSP_P2CMSG_INTEN 0x0110
+#define PSP_P2CMSG_INTSTS 0x0114
+
+#define PSP_C2PMSG_ATTR_0 0x0118
+#define PSP_C2PMSG_ATTR_1 0x011c
+#define PSP_C2PMSG_ATTR_2 0x0120
+#define PSP_C2PMSG_ATTR_3 0x0124
+#define PSP_P2CMSG_ATTR_0 0x0128
+
+#define PSP_CMDRESP_CMD_SHIFT 16
+#define PSP_CMDRESP_IOC BIT(0)
+#define PSP_CMDRESP_RESP BIT(31)
+#define PSP_CMDRESP_ERR_MASK 0xffff
+
+#define MAX_PSP_NAME_LEN 16
+
+struct sev_misc_dev {
+ struct kref refcount;
+ struct miscdevice misc;
+};
+
+struct psp_device {
+ struct list_head entry;
+
+ struct psp_vdata *vdata;
+ char name[MAX_PSP_NAME_LEN];
+
+ struct device *dev;
+ struct sp_device *sp;
+
+ void __iomem *io_regs;
+
+ int sev_state;
+ unsigned int sev_int_rcvd;
+ wait_queue_head_t sev_int_queue;
+ struct sev_misc_dev *sev_misc;
+ struct sev_user_data_status status_cmd_buf;
+ struct sev_data_init init_cmd_buf;
+};
+
+#endif /* __PSP_DEV_H */
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index bef387c8abfd..eb0da6572720 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -198,6 +198,8 @@ int sp_init(struct sp_device *sp)
if (sp->dev_vdata->ccp_vdata)
ccp_dev_init(sp);
+ if (sp->dev_vdata->psp_vdata)
+ psp_dev_init(sp);
return 0;
}
@@ -206,6 +208,9 @@ void sp_destroy(struct sp_device *sp)
if (sp->dev_vdata->ccp_vdata)
ccp_dev_destroy(sp);
+ if (sp->dev_vdata->psp_vdata)
+ psp_dev_destroy(sp);
+
sp_del_device(sp);
}
@@ -237,6 +242,27 @@ int sp_resume(struct sp_device *sp)
}
#endif
+struct sp_device *sp_get_psp_master_device(void)
+{
+ struct sp_device *i, *ret = NULL;
+ unsigned long flags;
+
+ write_lock_irqsave(&sp_unit_lock, flags);
+ if (list_empty(&sp_units))
+ goto unlock;
+
+ list_for_each_entry(i, &sp_units, entry) {
+ if (i->psp_data)
+ break;
+ }
+
+ if (i->get_psp_master_device)
+ ret = i->get_psp_master_device();
+unlock:
+ write_unlock_irqrestore(&sp_unit_lock, flags);
+ return ret;
+}
+
static int __init sp_mod_init(void)
{
#ifdef CONFIG_X86
@@ -246,6 +272,10 @@ static int __init sp_mod_init(void)
if (ret)
return ret;
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+ psp_pci_init();
+#endif
+
return 0;
#endif
@@ -265,6 +295,11 @@ static int __init sp_mod_init(void)
static void __exit sp_mod_exit(void)
{
#ifdef CONFIG_X86
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+ psp_pci_exit();
+#endif
+
sp_pci_exit();
#endif
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 5ab486ade1ad..acb197b66ced 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -42,12 +42,17 @@ struct ccp_vdata {
const unsigned int offset;
const unsigned int rsamax;
};
+
+struct psp_vdata {
+ const unsigned int offset;
+};
+
/* Structure to hold SP device data */
struct sp_dev_vdata {
const unsigned int bar;
const struct ccp_vdata *ccp_vdata;
- void *psp_vdata;
+ const struct psp_vdata *psp_vdata;
};
struct sp_device {
@@ -68,6 +73,10 @@ struct sp_device {
/* DMA caching attribute support */
unsigned int axcache;
+ /* get and set master device */
+ struct sp_device*(*get_psp_master_device)(void);
+ void (*set_psp_master_device)(struct sp_device *);
+
bool irq_registered;
bool use_tasklet;
@@ -103,6 +112,7 @@ void sp_free_ccp_irq(struct sp_device *sp, void *data);
int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
const char *name, void *data);
void sp_free_psp_irq(struct sp_device *sp, void *data);
+struct sp_device *sp_get_psp_master_device(void);
#ifdef CONFIG_CRYPTO_DEV_SP_CCP
@@ -130,4 +140,20 @@ static inline int ccp_dev_resume(struct sp_device *sp)
}
#endif /* CONFIG_CRYPTO_DEV_SP_CCP */
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+
+int psp_dev_init(struct sp_device *sp);
+void psp_pci_init(void);
+void psp_dev_destroy(struct sp_device *sp);
+void psp_pci_exit(void);
+
+#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
+
+static inline int psp_dev_init(struct sp_device *sp) { return 0; }
+static inline void psp_pci_init(void) { }
+static inline void psp_dev_destroy(struct sp_device *sp) { }
+static inline void psp_pci_exit(void) { }
+
+#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
+
#endif
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 9859aa683a28..f5f43c50698a 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -25,6 +25,7 @@
#include <linux/ccp.h>
#include "ccp-dev.h"
+#include "psp-dev.h"
#define MSIX_VECTORS 2
@@ -32,6 +33,7 @@ struct sp_pci {
int msix_count;
struct msix_entry msix_entry[MSIX_VECTORS];
};
+static struct sp_device *sp_dev_master;
static int sp_get_msix_irqs(struct sp_device *sp)
{
@@ -108,6 +110,45 @@ static void sp_free_irqs(struct sp_device *sp)
sp->psp_irq = 0;
}
+static bool sp_pci_is_master(struct sp_device *sp)
+{
+ struct device *dev_cur, *dev_new;
+ struct pci_dev *pdev_cur, *pdev_new;
+
+ dev_new = sp->dev;
+ dev_cur = sp_dev_master->dev;
+
+ pdev_new = to_pci_dev(dev_new);
+ pdev_cur = to_pci_dev(dev_cur);
+
+ if (pdev_new->bus->number < pdev_cur->bus->number)
+ return true;
+
+ if (PCI_SLOT(pdev_new->devfn) < PCI_SLOT(pdev_cur->devfn))
+ return true;
+
+ if (PCI_FUNC(pdev_new->devfn) < PCI_FUNC(pdev_cur->devfn))
+ return true;
+
+ return false;
+}
+
+static void psp_set_master(struct sp_device *sp)
+{
+ if (!sp_dev_master) {
+ sp_dev_master = sp;
+ return;
+ }
+
+ if (sp_pci_is_master(sp))
+ sp_dev_master = sp;
+}
+
+static struct sp_device *psp_get_master(void)
+{
+ return sp_dev_master;
+}
+
static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct sp_device *sp;
@@ -166,6 +207,8 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto e_err;
pci_set_master(pdev);
+ sp->set_psp_master_device = psp_set_master;
+ sp->get_psp_master_device = psp_get_master;
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
if (ret) {
@@ -225,6 +268,12 @@ static int sp_pci_resume(struct pci_dev *pdev)
}
#endif
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+static const struct psp_vdata psp_entry = {
+ .offset = 0x10500,
+};
+#endif
+
static const struct sp_dev_vdata dev_vdata[] = {
{
.bar = 2,
@@ -237,6 +286,9 @@ static const struct sp_dev_vdata dev_vdata[] = {
#ifdef CONFIG_CRYPTO_DEV_SP_CCP
.ccp_vdata = &ccpv5a,
#endif
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+ .psp_vdata = &psp_entry
+#endif
},
{
.bar = 2,
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 539450713838..d78d5fc173dc 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -135,10 +135,10 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
* Userspace can query the state of these implicitly tracked fences using poll()
* and related system calls:
*
- * - Checking for POLLIN, i.e. read access, can be use to query the state of the
+ * - Checking for EPOLLIN, i.e. read access, can be use to query the state of the
* most recent write or exclusive fence.
*
- * - Checking for POLLOUT, i.e. write access, can be used to query the state of
+ * - Checking for EPOLLOUT, i.e. write access, can be used to query the state of
* all attached fences, shared and exclusive ones.
*
* Note that this only signals the completion of the respective fences, i.e. the
@@ -168,13 +168,13 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
dmabuf = file->private_data;
if (!dmabuf || !dmabuf->resv)
- return POLLERR;
+ return EPOLLERR;
resv = dmabuf->resv;
poll_wait(file, &dmabuf->poll, poll);
- events = poll_requested_events(poll) & (POLLIN | POLLOUT);
+ events = poll_requested_events(poll) & (EPOLLIN | EPOLLOUT);
if (!events)
return 0;
@@ -193,12 +193,12 @@ retry:
goto retry;
}
- if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) {
+ if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
- __poll_t pevents = POLLIN;
+ __poll_t pevents = EPOLLIN;
if (shared_count == 0)
- pevents |= POLLOUT;
+ pevents |= EPOLLOUT;
spin_lock_irq(&dmabuf->poll.lock);
if (dcb->active) {
@@ -228,19 +228,19 @@ retry:
}
}
- if ((events & POLLOUT) && shared_count > 0) {
+ if ((events & EPOLLOUT) && shared_count > 0) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
int i;
/* Only queue a new callback if no event has fired yet */
spin_lock_irq(&dmabuf->poll.lock);
if (dcb->active)
- events &= ~POLLOUT;
+ events &= ~EPOLLOUT;
else
- dcb->active = POLLOUT;
+ dcb->active = EPOLLOUT;
spin_unlock_irq(&dmabuf->poll.lock);
- if (!(events & POLLOUT))
+ if (!(events & EPOLLOUT))
goto out;
for (i = 0; i < shared_count; ++i) {
@@ -253,14 +253,14 @@ retry:
*
* call dma_buf_poll_cb and force a recheck!
*/
- events &= ~POLLOUT;
+ events &= ~EPOLLOUT;
dma_buf_poll_cb(NULL, &dcb->cb);
break;
}
if (!dma_fence_add_callback(fence, &dcb->cb,
dma_buf_poll_cb)) {
dma_fence_put(fence);
- events &= ~POLLOUT;
+ events &= ~EPOLLOUT;
break;
}
dma_fence_put(fence);
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 8e8c4a12a0bc..35dd06479867 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -325,7 +325,7 @@ static __poll_t sync_file_poll(struct file *file, poll_table *wait)
wake_up_all(&sync_file->wq);
}
- return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+ return dma_fence_is_signaled(sync_file->fence) ? EPOLLIN : 0;
}
static long sync_file_ioctl_merge(struct sync_file *sync_file,
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 523391bb3fbe..f0587273940e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1792,9 +1792,9 @@ static __poll_t fw_device_op_poll(struct file *file, poll_table * pt)
poll_wait(file, &client->wait, pt);
if (fw_device_is_shutdown(client->device))
- mask |= POLLHUP | POLLERR;
+ mask |= EPOLLHUP | EPOLLERR;
if (!list_empty(&client->event_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c
index fee2e9e7ea20..a128dd1126ae 100644
--- a/drivers/firewire/nosy.c
+++ b/drivers/firewire/nosy.c
@@ -337,10 +337,10 @@ nosy_poll(struct file *file, poll_table *pt)
poll_wait(file, &client->buffer.wait, pt);
if (atomic_read(&client->buffer.size) > 0)
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
if (list_empty(&client->lynx->link))
- ret |= POLLHUP;
+ ret |= EPOLLHUP;
return ret;
}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 36ca5064486e..d66de67ef307 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -630,7 +630,7 @@ static __poll_t lineevent_poll(struct file *filep,
poll_wait(filep, &le->wait, wait);
if (!kfifo_is_empty(&le->events))
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
return events;
}
@@ -775,7 +775,7 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p)
ret = kfifo_put(&le->events, ge);
if (ret != 0)
- wake_up_poll(&le->wait, POLLIN);
+ wake_up_poll(&le->wait, EPOLLIN);
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 9a17bd3639d1..e394799979a6 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -567,7 +567,7 @@ __poll_t drm_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &file_priv->event_wait, wait);
if (!list_empty(&file_priv->event_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e42d9a4de322..0be50e43507d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -244,7 +244,7 @@
* The two separate pointers let us decouple read()s from tail pointer aging.
*
* The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering POLLIN events)
+ * callback (the same callback that is used for delivering EPOLLIN events)
*
* Initially the tails are marked invalid with %INVALID_TAIL_PTR which
* indicates that an updated tail pointer is needed.
@@ -2292,13 +2292,13 @@ static ssize_t i915_perf_read(struct file *file,
mutex_unlock(&dev_priv->perf.lock);
}
- /* We allow the poll checking to sometimes report false positive POLLIN
+ /* We allow the poll checking to sometimes report false positive EPOLLIN
* events where we might actually report EAGAIN on read() if there's
* not really any data available. In this situation though we don't
- * want to enter a busy loop between poll() reporting a POLLIN event
+ * want to enter a busy loop between poll() reporting a EPOLLIN event
* and read() returning -EAGAIN. Clearing the oa.pollin state here
* effectively ensures we back off until the next hrtimer callback
- * before reporting another POLLIN event.
+ * before reporting another EPOLLIN event.
*/
if (ret >= 0 || ret == -EAGAIN) {
/* Maybe make ->pollin per-stream state if we support multiple
@@ -2358,7 +2358,7 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
* samples to read.
*/
if (dev_priv->perf.oa.pollin)
- events |= POLLIN;
+ events |= EPOLLIN;
return events;
}
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index dfd8d0048980..1c5e74cb9279 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -1271,7 +1271,7 @@ static __poll_t vga_arb_fpoll(struct file *file, poll_table *wait)
pr_debug("%s\n", __func__);
poll_wait(file, &vga_wait_queue, wait);
- return POLLIN;
+ return EPOLLIN;
}
static int vga_arb_open(struct inode *inode, struct file *file)
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index c783fd5ef809..4f4e7a08a07b 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1185,9 +1185,9 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait)
poll_wait(file, &list->hdev->debug_wait, wait);
if (list->head != list->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!list->hdev->debug)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
return 0;
}
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index b7e86aba6f33..5be8de70c651 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -142,9 +142,9 @@ static __poll_t roccat_poll(struct file *file, poll_table *wait)
struct roccat_reader *reader = file->private_data;
poll_wait(file, &reader->device->wait, wait);
if (reader->cbuf_start != reader->device->cbuf_end)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!reader->device->exist)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
return 0;
}
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index 21ed6c55c40a..e8a114157f87 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -714,7 +714,7 @@ static __poll_t hid_sensor_custom_poll(struct file *file,
poll_wait(file, &sensor_inst->wait, wait);
if (!kfifo_is_empty(&sensor_inst->data_fifo))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index be210219f982..fbfcc8009432 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -255,9 +255,9 @@ static __poll_t hidraw_poll(struct file *file, poll_table *wait)
poll_wait(file, &list->hidraw->wait, wait);
if (list->head != list->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!list->hidraw->exist)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
return 0;
}
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index fc43850a155e..4e0e7baf8513 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -760,7 +760,7 @@ static __poll_t uhid_char_poll(struct file *file, poll_table *wait)
poll_wait(file, &uhid->waitq, wait);
if (uhid->head != uhid->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 0ff3e7e70c8d..e3ce233f8bdc 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -428,9 +428,9 @@ static __poll_t hiddev_poll(struct file *file, poll_table *wait)
poll_wait(file, &list->hiddev->wait, wait);
if (list->head != list->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!list->hiddev->exist)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
return 0;
}
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 8fbbacb0fe21..324cb8ec9405 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -1132,9 +1132,9 @@ static __poll_t cs_char_poll(struct file *file, poll_table *wait)
poll_wait(file, &cs_char_data.wait, wait);
spin_lock_bh(&csdata->lock);
if (!list_empty(&csdata->chardev_queue))
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
else if (!list_empty(&csdata->dataind_queue))
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
spin_unlock_bh(&csdata->lock);
return ret;
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 047959e74bb1..832777527936 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -113,10 +113,10 @@ static __poll_t hvt_op_poll(struct file *file, poll_table *wait)
poll_wait(file, &hvt->outmsg_q, wait);
if (hvt->mode == HVUTIL_TRANSPORT_DESTROY)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
if (hvt->outmsg_len > 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c
index ff03324dee13..05e0c353e089 100644
--- a/drivers/iio/buffer/industrialio-buffer-dma.c
+++ b/drivers/iio/buffer/industrialio-buffer-dma.c
@@ -222,7 +222,7 @@ void iio_dma_buffer_block_done(struct iio_dma_buffer_block *block)
spin_unlock_irqrestore(&queue->list_lock, flags);
iio_buffer_block_put_atomic(block);
- wake_up_interruptible_poll(&queue->buffer.pollq, POLLIN | POLLRDNORM);
+ wake_up_interruptible_poll(&queue->buffer.pollq, EPOLLIN | EPOLLRDNORM);
}
EXPORT_SYMBOL_GPL(iio_dma_buffer_block_done);
@@ -251,7 +251,7 @@ void iio_dma_buffer_block_list_abort(struct iio_dma_buffer_queue *queue,
}
spin_unlock_irqrestore(&queue->list_lock, flags);
- wake_up_interruptible_poll(&queue->buffer.pollq, POLLIN | POLLRDNORM);
+ wake_up_interruptible_poll(&queue->buffer.pollq, EPOLLIN | EPOLLRDNORM);
}
EXPORT_SYMBOL_GPL(iio_dma_buffer_block_list_abort);
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 6184c100a94a..79abf70a126d 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -166,7 +166,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
* @wait: Poll table structure pointer for which the driver adds
* a wait queue
*
- * Return: (POLLIN | POLLRDNORM) if data is available for reading
+ * Return: (EPOLLIN | EPOLLRDNORM) if data is available for reading
* or 0 for other cases
*/
__poll_t iio_buffer_poll(struct file *filp,
@@ -180,7 +180,7 @@ __poll_t iio_buffer_poll(struct file *filp,
poll_wait(filp, &rb->pollq, wait);
if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
@@ -1396,7 +1396,7 @@ static int iio_push_to_buffer(struct iio_buffer *buffer, const void *data)
* We can't just test for watermark to decide if we wake the poll queue
* because read may request less samples than the watermark.
*/
- wake_up_interruptible_poll(&buffer->pollq, POLLIN | POLLRDNORM);
+ wake_up_interruptible_poll(&buffer->pollq, EPOLLIN | EPOLLRDNORM);
return 0;
}
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 0bcf073e46db..c6dfdf0aaac5 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -80,7 +80,7 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp)
copied = kfifo_put(&ev_int->det_events, ev);
if (copied != 0)
- wake_up_poll(&ev_int->wait, POLLIN);
+ wake_up_poll(&ev_int->wait, EPOLLIN);
}
return 0;
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(iio_push_event);
* @filep: File structure pointer to identify the device
* @wait: Poll table pointer to add the wait queue on
*
- * Return: (POLLIN | POLLRDNORM) if data is available for reading
+ * Return: (EPOLLIN | EPOLLRDNORM) if data is available for reading
* or a negative error code on failure
*/
static __poll_t iio_event_poll(struct file *filep,
@@ -108,7 +108,7 @@ static __poll_t iio_event_poll(struct file *filep,
poll_wait(filep, &ev_int->wait, wait);
if (!kfifo_is_empty(&ev_int->det_events))
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
return events;
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 8ae636bb09e5..01702265c1e1 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1144,7 +1144,7 @@ static __poll_t ib_ucm_poll(struct file *filp,
poll_wait(filp, &file->poll_wait, wait);
if (!list_empty(&file->events))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6ba4231f2b07..f015f1bf88c9 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1639,7 +1639,7 @@ static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &file->poll_wait, wait);
if (!list_empty(&file->event_list))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 78c77962422e..bb98c9e4a7fd 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -633,12 +633,12 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
struct ib_umad_file *file = filp->private_data;
/* we will always be able to post a MAD send */
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
poll_wait(filp, &file->recv_wait, wait);
if (!list_empty(&file->recv_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5b811bf574d6..395a3b091229 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -351,7 +351,7 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
- pollflags = POLLIN | POLLRDNORM;
+ pollflags = EPOLLIN | EPOLLRDNORM;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index d9a0f2590294..41fafebe3b0d 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -612,13 +612,13 @@ static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt)
uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt;
if (!uctxt)
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT)
pollflag = poll_urgent(fp, pt);
else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV)
pollflag = poll_next(fp, pt);
else /* invalid */
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
return pollflag;
}
@@ -1435,7 +1435,7 @@ static __poll_t poll_urgent(struct file *fp,
spin_lock_irq(&dd->uctxt_lock);
if (uctxt->urgent != uctxt->urgent_poll) {
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
uctxt->urgent_poll = uctxt->urgent;
} else {
pollflag = 0;
@@ -1462,7 +1462,7 @@ static __poll_t poll_next(struct file *fp,
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt);
pollflag = 0;
} else {
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
}
spin_unlock_irq(&dd->uctxt_lock);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index f7593b5e2b76..52c29db3a2f4 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1085,7 +1085,7 @@ static __poll_t qib_poll_urgent(struct qib_ctxtdata *rcd,
spin_lock_irq(&dd->uctxt_lock);
if (rcd->urgent != rcd->urgent_poll) {
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
rcd->urgent_poll = rcd->urgent;
} else {
pollflag = 0;
@@ -1111,7 +1111,7 @@ static __poll_t qib_poll_next(struct qib_ctxtdata *rcd,
dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_ENB, rcd->ctxt);
pollflag = 0;
} else
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
spin_unlock_irq(&dd->uctxt_lock);
return pollflag;
@@ -1124,13 +1124,13 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
rcd = ctxt_fp(fp);
if (!rcd)
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
else if (rcd->poll_type == QIB_POLL_TYPE_URGENT)
pollflag = qib_poll_urgent(rcd, fp, pt);
else if (rcd->poll_type == QIB_POLL_TYPE_ANYRCV)
pollflag = qib_poll_next(rcd, fp, pt);
else /* invalid */
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
return pollflag;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index de318389a301..67de94343cb4 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock)
int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
uint32_t *addr, uint16_t *port)
{
- int len;
int err;
struct sockaddr_in sock_addr;
err = sock->ops->getname(sock,
(struct sockaddr *)&sock_addr,
- &len, 0);
- if (err)
+ 0);
+ if (err < 0)
return err;
if (sock_addr.sin_family != AF_INET)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 19624e023ebd..0336643c2ed6 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -874,7 +874,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
iser_info("iser conn %p rc = %d\n", iser_conn, rc);
if (rc > 0)
- return 1; /* success, this is the equivalent of POLLOUT */
+ return 1; /* success, this is the equivalent of EPOLLOUT */
else if (!rc)
return 0; /* timeout */
else
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 94049fdc583c..c81c79d01d93 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -650,12 +650,12 @@ static __poll_t evdev_poll(struct file *file, poll_table *wait)
poll_wait(file, &evdev->wait, wait);
if (evdev->exist && !client->revoked)
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
else
- mask = POLLHUP | POLLERR;
+ mask = EPOLLHUP | EPOLLERR;
if (client->packet_head != client->tail)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 0d0b2ab1bb6b..9785546420a7 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1053,7 +1053,7 @@ static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
poll_wait(file, &input_devices_poll_wait, wait);
if (file->f_version != input_devices_state) {
file->f_version = input_devices_state;
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index fe3255572886..4c1e427dfabb 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -442,8 +442,8 @@ static __poll_t joydev_poll(struct file *file, poll_table *wait)
struct joydev *joydev = client->joydev;
poll_wait(file, &joydev->wait, wait);
- return (joydev_data_pending(client) ? (POLLIN | POLLRDNORM) : 0) |
- (joydev->exist ? 0 : (POLLHUP | POLLERR));
+ return (joydev_data_pending(client) ? (EPOLLIN | EPOLLRDNORM) : 0) |
+ (joydev->exist ? 0 : (EPOLLHUP | EPOLLERR));
}
static int joydev_handle_JSIOCSAXMAP(struct joydev *joydev,
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 9c3f7ec3bd3d..49b34de0aed4 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -414,7 +414,7 @@ static __poll_t hp_sdc_rtc_poll(struct file *file, poll_table *wait)
l = 0;
if (l != 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index f640c591ef23..96a887f33698 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -704,7 +704,7 @@ static __poll_t uinput_poll(struct file *file, poll_table *wait)
poll_wait(file, &udev->waitq, wait);
if (udev->head != udev->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 731d84ae5101..e08228061bcd 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -765,9 +765,9 @@ static __poll_t mousedev_poll(struct file *file, poll_table *wait)
poll_wait(file, &mousedev->wait, wait);
- mask = mousedev->exist ? POLLOUT | POLLWRNORM : POLLHUP | POLLERR;
+ mask = mousedev->exist ? EPOLLOUT | EPOLLWRNORM : EPOLLHUP | EPOLLERR;
if (client->ready || client->buffer)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index fccf55a380b2..17b7fbecd9fe 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c
@@ -247,9 +247,9 @@ static __poll_t serio_raw_poll(struct file *file, poll_table *wait)
poll_wait(file, &serio_raw->wait, wait);
- mask = serio_raw->dead ? POLLHUP | POLLERR : POLLOUT | POLLWRNORM;
+ mask = serio_raw->dead ? EPOLLHUP | EPOLLERR : EPOLLOUT | EPOLLWRNORM;
if (serio_raw->head != serio_raw->tail)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/input/serio/userio.c b/drivers/input/serio/userio.c
index a63de06b08bc..9ab5c45c3a9f 100644
--- a/drivers/input/serio/userio.c
+++ b/drivers/input/serio/userio.c
@@ -255,7 +255,7 @@ static __poll_t userio_char_poll(struct file *file, poll_table *wait)
poll_wait(file, &userio->waitq, wait);
if (userio->head != userio->tail)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index e268811dc544..19cd93783c87 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -731,12 +731,12 @@ capi_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
if (!cdev->ap.applid)
- return POLLERR;
+ return EPOLLERR;
poll_wait(file, &(cdev->recvwait), wait);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (!skb_queue_empty(&cdev->recvqueue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c
index 34b7704042a4..342585e04fd3 100644
--- a/drivers/isdn/divert/divert_procfs.c
+++ b/drivers/isdn/divert/divert_procfs.c
@@ -125,9 +125,9 @@ isdn_divert_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &(rd_queue), wait);
- /* mask = POLLOUT | POLLWRNORM; */
+ /* mask = EPOLLOUT | EPOLLWRNORM; */
if (*((struct divert_info **) file->private_data)) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
return mask;
} /* isdn_divert_poll */
diff --git a/drivers/isdn/hardware/eicon/divamnt.c b/drivers/isdn/hardware/eicon/divamnt.c
index 70f16102a001..5a95587b3117 100644
--- a/drivers/isdn/hardware/eicon/divamnt.c
+++ b/drivers/isdn/hardware/eicon/divamnt.c
@@ -103,9 +103,9 @@ static __poll_t maint_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &msgwaitq, wait);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (file->private_data || diva_dbg_q_length()) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
return (mask);
}
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index da5cc5ab7e2d..525518c945fe 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -370,31 +370,31 @@ static __poll_t um_idi_poll(struct file *file, poll_table *wait)
diva_um_idi_os_context_t *p_os;
if (!file->private_data) {
- return (POLLERR);
+ return (EPOLLERR);
}
if ((!(p_os =
(diva_um_idi_os_context_t *)
diva_um_id_get_os_context(file->private_data)))
|| p_os->aborted) {
- return (POLLERR);
+ return (EPOLLERR);
}
poll_wait(file, &p_os->read_wait, wait);
if (p_os->aborted) {
- return (POLLERR);
+ return (EPOLLERR);
}
switch (diva_user_mode_idi_ind_ready(file->private_data, file)) {
case (-1):
- return (POLLERR);
+ return (EPOLLERR);
case 0:
return (0);
}
- return (POLLIN | POLLRDNORM);
+ return (EPOLLIN | EPOLLRDNORM);
}
static int um_idi_open(struct inode *inode, struct file *file)
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index fbc788e6f0db..b9980e84f9db 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -653,9 +653,9 @@ static ssize_t divas_read(struct file *file, char __user *buf,
static __poll_t divas_poll(struct file *file, poll_table *wait)
{
if (!file->private_data) {
- return (POLLERR);
+ return (EPOLLERR);
}
- return (POLLIN | POLLRDNORM);
+ return (EPOLLIN | EPOLLRDNORM);
}
static const struct file_operations divas_fops = {
diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c
index 3478f6f099eb..f52f4622b10b 100644
--- a/drivers/isdn/hardware/eicon/divasproc.c
+++ b/drivers/isdn/hardware/eicon/divasproc.c
@@ -101,7 +101,7 @@ divas_write(struct file *file, const char __user *buf, size_t count, loff_t *off
static __poll_t divas_poll(struct file *file, poll_table *wait)
{
- return (POLLERR);
+ return (EPOLLERR);
}
static int divas_open(struct inode *inode, struct file *file)
diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c
index 6abea6915f49..6e898b90e86e 100644
--- a/drivers/isdn/hysdn/hysdn_proclog.c
+++ b/drivers/isdn/hysdn/hysdn_proclog.c
@@ -294,7 +294,7 @@ hysdn_log_poll(struct file *file, poll_table *wait)
poll_wait(file, &(pd->rd_queue), wait);
if (*((struct log_data **) file->private_data))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
} /* hysdn_log_poll */
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 0521c32949d4..7c6f3f5d9d9a 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1237,22 +1237,22 @@ isdn_poll(struct file *file, poll_table *wait)
mutex_lock(&isdn_mutex);
if (minor == ISDN_MINOR_STATUS) {
poll_wait(file, &(dev->info_waitq), wait);
- /* mask = POLLOUT | POLLWRNORM; */
+ /* mask = EPOLLOUT | EPOLLWRNORM; */
if (file->private_data) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
goto out;
}
if (minor >= ISDN_MINOR_CTRL && minor <= ISDN_MINOR_CTRLMAX) {
if (drvidx < 0) {
/* driver deregistered while file open */
- mask = POLLHUP;
+ mask = EPOLLHUP;
goto out;
}
poll_wait(file, &(dev->drv[drvidx]->st_waitq), wait);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (dev->drv[drvidx]->stavail) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
goto out;
}
@@ -1262,7 +1262,7 @@ isdn_poll(struct file *file, poll_table *wait)
goto out;
}
#endif
- mask = POLLERR;
+ mask = EPOLLERR;
out:
mutex_unlock(&isdn_mutex);
return mask;
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 57884319b4b1..a7b275ea5de1 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -704,12 +704,12 @@ isdn_ppp_poll(struct file *file, poll_table *wait)
if (!(is->state & IPPP_OPEN)) {
if (is->state == IPPP_CLOSEWAIT)
- return POLLHUP;
+ return EPOLLHUP;
printk(KERN_DEBUG "isdn_ppp: device not open\n");
- return POLLERR;
+ return EPOLLERR;
}
/* we're always ready to send .. */
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
spin_lock_irqsave(&is->buflock, flags);
bl = is->last;
@@ -719,7 +719,7 @@ isdn_ppp_poll(struct file *file, poll_table *wait)
*/
if (bf->next != bl || (is->state & IPPP_NOBLOCK)) {
is->state &= ~IPPP_NOBLOCK;
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
spin_unlock_irqrestore(&is->buflock, flags);
return mask;
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c5603d1a07d6..1f8f489b4167 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -560,7 +560,7 @@ done:
static int
data_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
struct sock *sk = sock->sk;
@@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
- *addr_len = sizeof(*maddr);
maddr->family = AF_ISDN;
maddr->dev = _pms(sk)->dev->id;
maddr->channel = _pms(sk)->ch.nr;
maddr->sapi = _pms(sk)->ch.addr & 0xff;
maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
release_sock(sk);
- return 0;
+ return sizeof(*maddr);
}
static const struct proto_ops data_sock_ops = {
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index f4272d4e0a26..211ed6cffd10 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -145,7 +145,7 @@ static __poll_t
mISDN_poll(struct file *filep, poll_table *wait)
{
struct mISDNtimerdev *dev = filep->private_data;
- __poll_t mask = POLLERR;
+ __poll_t mask = EPOLLERR;
if (*debug & DEBUG_TIMER)
printk(KERN_DEBUG "%s(%p, %p)\n", __func__, filep, wait);
@@ -153,7 +153,7 @@ mISDN_poll(struct file *filep, poll_table *wait)
poll_wait(filep, &dev->wait, wait);
mask = 0;
if (dev->work || !list_empty(&dev->expired))
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
if (*debug & DEBUG_TIMER)
printk(KERN_DEBUG "%s work(%d) empty(%d)\n", __func__,
dev->work, list_empty(&dev->expired));
diff --git a/drivers/leds/uleds.c b/drivers/leds/uleds.c
index 5beacab05ed7..0c43bfac9598 100644
--- a/drivers/leds/uleds.c
+++ b/drivers/leds/uleds.c
@@ -183,7 +183,7 @@ static __poll_t uleds_poll(struct file *file, poll_table *wait)
poll_wait(file, &udev->waitq, wait);
if (udev->new_data)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 346e6f5f77be..e8ae2e54151c 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -1259,7 +1259,7 @@ static __poll_t smu_fpoll(struct file *file, poll_table *wait)
spin_lock_irqsave(&pp->lock, flags);
if (pp->busy && pp->cmd.status != 1)
- mask |= POLLIN;
+ mask |= EPOLLIN;
spin_unlock_irqrestore(&pp->lock, flags);
}
if (pp->mode == smu_file_events) {
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 08849e33c567..94c0f3f7df69 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -2169,7 +2169,7 @@ pmu_fpoll(struct file *filp, poll_table *wait)
poll_wait(filp, &pp->wait, wait);
spin_lock_irqsave(&pp->lock, flags);
if (pp->rb_get != pp->rb_put)
- mask |= POLLIN;
+ mask |= EPOLLIN;
spin_unlock_irqrestore(&pp->lock, flags);
return mask;
}
diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
index f84730d63b1f..58bfafc34bc4 100644
--- a/drivers/mailbox/mailbox-test.c
+++ b/drivers/mailbox/mailbox-test.c
@@ -243,7 +243,7 @@ mbox_test_message_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &tdev->waitq, wait);
if (mbox_test_message_data_ready(tdev))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6cc6c0f9c3a9..458e1d38577d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,8 +287,10 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) \
+ if (kthread_should_stop()) { \
+ set_current_state(TASK_RUNNING); \
return 0; \
+ } \
\
schedule(); \
mutex_lock(&(ca)->set->bucket_lock); \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 5e2d4e80198e..12e5197f186c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -658,10 +658,15 @@ struct cache_set {
atomic_long_t writeback_keys_done;
atomic_long_t writeback_keys_failed;
+ atomic_long_t reclaim;
+ atomic_long_t flush_write;
+ atomic_long_t retry_flush_write;
+
enum {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
+#define DEFAULT_IO_ERROR_LIMIT 8
unsigned error_limit;
unsigned error_decay;
@@ -675,6 +680,8 @@ struct cache_set {
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
+
+ DECLARE_HEAP(struct btree *, flush_btree);
};
struct bbio {
@@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
void bch_cached_dev_detach(struct cached_dev *);
void bch_cached_dev_run(struct cached_dev *);
void bcache_device_stop(struct bcache_device *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index bf3a48aa9a9a..fad9fe8817eb 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1869,14 +1869,17 @@ void bch_initial_gc_finish(struct cache_set *c)
*/
for_each_cache(ca, c, i) {
for_each_bucket(b, ca) {
- if (fifo_full(&ca->free[RESERVE_PRIO]))
+ if (fifo_full(&ca->free[RESERVE_PRIO]) &&
+ fifo_full(&ca->free[RESERVE_BTREE]))
break;
if (bch_can_invalidate_bucket(ca, b) &&
!GC_MARK(b)) {
__bch_invalidate_one_bucket(ca, b);
- fifo_push(&ca->free[RESERVE_PRIO],
- b - ca->buckets);
+ if (!fifo_push(&ca->free[RESERVE_PRIO],
+ b - ca->buckets))
+ fifo_push(&ca->free[RESERVE_BTREE],
+ b - ca->buckets);
}
}
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index a87165c1d8e5..1b736b860739 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -368,6 +368,12 @@ err:
}
/* Journalling */
+#define journal_max_cmp(l, r) \
+ (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
+ fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
+#define journal_min_cmp(l, r) \
+ (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
+ fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
static void btree_flush_write(struct cache_set *c)
{
@@ -375,28 +381,41 @@ static void btree_flush_write(struct cache_set *c)
* Try to find the btree node with that references the oldest journal
* entry, best is our current candidate and is locked if non NULL:
*/
- struct btree *b, *best;
- unsigned i;
+ struct btree *b;
+ int i;
+
+ atomic_long_inc(&c->flush_write);
+
retry:
- best = NULL;
-
- for_each_cached_btree(b, c, i)
- if (btree_current_write(b)->journal) {
- if (!best)
- best = b;
- else if (journal_pin_cmp(c,
- btree_current_write(best)->journal,
- btree_current_write(b)->journal)) {
- best = b;
+ spin_lock(&c->journal.lock);
+ if (heap_empty(&c->flush_btree)) {
+ for_each_cached_btree(b, c, i)
+ if (btree_current_write(b)->journal) {
+ if (!heap_full(&c->flush_btree))
+ heap_add(&c->flush_btree, b,
+ journal_max_cmp);
+ else if (journal_max_cmp(b,
+ heap_peek(&c->flush_btree))) {
+ c->flush_btree.data[0] = b;
+ heap_sift(&c->flush_btree, 0,
+ journal_max_cmp);
+ }
}
- }
- b = best;
+ for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
+ heap_sift(&c->flush_btree, i, journal_min_cmp);
+ }
+
+ b = NULL;
+ heap_pop(&c->flush_btree, b, journal_min_cmp);
+ spin_unlock(&c->journal.lock);
+
if (b) {
mutex_lock(&b->write_lock);
if (!btree_current_write(b)->journal) {
mutex_unlock(&b->write_lock);
/* We raced */
+ atomic_long_inc(&c->retry_flush_write);
goto retry;
}
@@ -476,6 +495,8 @@ static void journal_reclaim(struct cache_set *c)
unsigned iter, n = 0;
atomic_t p;
+ atomic_long_inc(&c->reclaim);
+
while (!atomic_read(&fifo_front(&c->journal.pin)))
fifo_pop(&c->journal.pin, p);
@@ -819,7 +840,8 @@ int bch_journal_alloc(struct cache_set *c)
j->w[0].c = c;
j->w[1].c = c;
- if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
+ if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
+ !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
!(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
return -ENOMEM;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 133b81225ea9..312895788036 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -957,7 +957,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
cached_dev_put(dc);
}
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+ uint8_t *set_uuid)
{
uint32_t rtime = cpu_to_le32(get_seconds());
struct uuid_entry *u;
@@ -965,7 +966,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
bdevname(dc->bdev, buf);
- if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
+ if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
+ (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
return -ENOENT;
if (dc->disk.c) {
@@ -1194,7 +1196,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list)
- bch_cached_dev_attach(dc, c);
+ bch_cached_dev_attach(dc, c, NULL);
if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
@@ -1553,7 +1555,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
- c->error_limit = 8 << IO_ERROR_SHIFT;
+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
return c;
err:
@@ -1716,7 +1718,7 @@ static void run_cache_set(struct cache_set *c)
bcache_write_super(c);
list_for_each_entry_safe(dc, t, &uncached_devices, list)
- bch_cached_dev_attach(dc, c);
+ bch_cached_dev_attach(dc, c, NULL);
flash_devs_run(c);
@@ -1833,6 +1835,7 @@ void bch_cache_release(struct kobject *kobj)
static int cache_alloc(struct cache *ca)
{
size_t free;
+ size_t btree_buckets;
struct bucket *b;
__module_get(THIS_MODULE);
@@ -1840,9 +1843,19 @@ static int cache_alloc(struct cache *ca)
bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
+ /*
+ * when ca->sb.njournal_buckets is not zero, journal exists,
+ * and in bch_journal_replay(), tree node may split,
+ * so bucket of RESERVE_BTREE type is needed,
+ * the worst situation is all journal buckets are valid journal,
+ * and all the keys need to replay,
+ * so the number of RESERVE_BTREE type buckets should be as much
+ * as journal buckets
+ */
+ btree_buckets = ca->sb.njournal_buckets ?: 8;
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
- if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+ if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b4184092c727..78cd7bd50fdd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -65,6 +65,9 @@ read_attribute(bset_tree_stats);
read_attribute(state);
read_attribute(cache_read_races);
+read_attribute(reclaim);
+read_attribute(flush_write);
+read_attribute(retry_flush_write);
read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
@@ -195,7 +198,7 @@ STORE(__cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
- ssize_t v = size;
+ ssize_t v;
struct cache_set *c;
struct kobj_uevent_env *env;
@@ -215,7 +218,9 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_rate,
dc->writeback_rate.rate, 1, INT_MAX);
- d_strtoul_nonzero(writeback_rate_update_seconds);
+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
+ dc->writeback_rate_update_seconds,
+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
@@ -267,17 +272,20 @@ STORE(__cached_dev)
}
if (attr == &sysfs_attach) {
- if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
+ uint8_t set_uuid[16];
+
+ if (bch_parse_uuid(buf, set_uuid) < 16)
return -EINVAL;
+ v = -ENOENT;
list_for_each_entry(c, &bch_cache_sets, list) {
- v = bch_cached_dev_attach(dc, c);
+ v = bch_cached_dev_attach(dc, c, set_uuid);
if (!v)
return size;
}
pr_err("Can't attach %s: cache set not found", buf);
- size = v;
+ return v;
}
if (attr == &sysfs_detach && dc->disk.c)
@@ -545,6 +553,15 @@ SHOW(__bch_cache_set)
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
+ sysfs_print(reclaim,
+ atomic_long_read(&c->reclaim));
+
+ sysfs_print(flush_write,
+ atomic_long_read(&c->flush_write));
+
+ sysfs_print(retry_flush_write,
+ atomic_long_read(&c->retry_flush_write));
+
sysfs_print(writeback_keys_done,
atomic_long_read(&c->writeback_keys_done));
sysfs_print(writeback_keys_failed,
@@ -556,7 +573,7 @@ SHOW(__bch_cache_set)
/* See count_io_errors for why 88 */
sysfs_print(io_error_halflife, c->error_decay * 88);
- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
+ sysfs_print(io_error_limit, c->error_limit);
sysfs_hprint(congested,
((uint64_t) bch_get_congested(c)) << 9);
@@ -656,7 +673,7 @@ STORE(__bch_cache_set)
}
if (attr == &sysfs_io_error_limit)
- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
+ c->error_limit = strtoul_or_return(buf);
/* See count_io_errors() for why 88 */
if (attr == &sysfs_io_error_halflife)
@@ -731,6 +748,9 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_bset_tree_stats,
&sysfs_cache_read_races,
+ &sysfs_reclaim,
+ &sysfs_flush_write,
+ &sysfs_retry_flush_write,
&sysfs_writeback_keys_done,
&sysfs_writeback_keys_failed,
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 4df4c5c1cab2..a6763db7f061 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -112,6 +112,8 @@ do { \
#define heap_full(h) ((h)->used == (h)->size)
+#define heap_empty(h) ((h)->used == 0)
+
#define DECLARE_FIFO(type, name) \
struct { \
size_t front, back, size, mask; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 51306a19ab03..f1d2fc15abcc 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
while (!kthread_should_stop()) {
down_write(&dc->writeback_lock);
+ set_current_state(TASK_INTERRUPTIBLE);
if (!atomic_read(&dc->has_dirty) ||
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
!dc->writeback_running)) {
up_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop())
+ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
return 0;
+ }
schedule();
continue;
}
+ set_current_state(TASK_RUNNING);
searched_full_index = refill_dirty(dc);
@@ -652,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate.rate = 1024;
dc->writeback_rate_minimum = 8;
- dc->writeback_rate_update_seconds = 5;
+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 66f1c527fa24..587b25599856 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -8,6 +8,9 @@
#define MAX_WRITEBACKS_IN_PASS 5
#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
+
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 3f6791afd3e4..a89fd8f44453 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1937,7 +1937,7 @@ static __poll_t dm_poll(struct file *filp, poll_table *wait)
poll_wait(filp, &dm_global_eventq, wait);
if ((int)(atomic_read(&dm_global_event_nr) - priv->global_event_nr) > 0)
- mask |= POLLIN;
+ mask |= EPOLLIN;
return mask;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0081ace39a64..bc67ab6844f0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7891,14 +7891,14 @@ static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
__poll_t mask;
if (md_unloading)
- return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
+ return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
poll_wait(filp, &md_event_waiters, wait);
/* always allow read */
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
if (seq->poll_event != atomic_read(&md_event_count))
- mask |= POLLERR | POLLPRI;
+ mask |= EPOLLERR | EPOLLPRI;
return mask;
}
diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c
index ecc89d9a279b..492db12b8c4d 100644
--- a/drivers/media/cec/cec-api.c
+++ b/drivers/media/cec/cec-api.c
@@ -51,15 +51,15 @@ static __poll_t cec_poll(struct file *filp,
__poll_t res = 0;
if (!cec_is_registered(adap))
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
mutex_lock(&adap->lock);
if (adap->is_configured &&
adap->transmit_queue_sz < CEC_MAX_MSG_TX_QUEUE_SZ)
- res |= POLLOUT | POLLWRNORM;
+ res |= EPOLLOUT | EPOLLWRNORM;
if (fh->queued_msgs)
- res |= POLLIN | POLLRDNORM;
+ res |= EPOLLIN | EPOLLRDNORM;
if (fh->total_queued_events)
- res |= POLLPRI;
+ res |= EPOLLPRI;
poll_wait(filp, &fh->wait, poll);
mutex_unlock(&adap->lock);
return res;
diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c
index 8ee3eebef4db..d4987fd05d05 100644
--- a/drivers/media/common/saa7146/saa7146_fops.c
+++ b/drivers/media/common/saa7146/saa7146_fops.c
@@ -332,7 +332,7 @@ static __poll_t __fops_poll(struct file *file, struct poll_table_struct *wait)
if (vdev->vfl_type == VFL_TYPE_VBI) {
if (fh->dev->ext_vv_data->capabilities & V4L2_CAP_SLICED_VBI_OUTPUT)
- return res | POLLOUT | POLLWRNORM;
+ return res | EPOLLOUT | EPOLLWRNORM;
if( 0 == fh->vbi_q.streaming )
return res | videobuf_poll_stream(file, &fh->vbi_q, wait);
q = &fh->vbi_q;
@@ -346,13 +346,13 @@ static __poll_t __fops_poll(struct file *file, struct poll_table_struct *wait)
if (!buf) {
DEB_D("buf == NULL!\n");
- return res | POLLERR;
+ return res | EPOLLERR;
}
poll_wait(file, &buf->done, wait);
if (buf->state == VIDEOBUF_DONE || buf->state == VIDEOBUF_ERROR) {
DEB_D("poll succeeded!\n");
- return res | POLLIN | POLLRDNORM;
+ return res | EPOLLIN | EPOLLRDNORM;
}
DEB_D("nothing to poll for, buf->state:%d\n", buf->state);
diff --git a/drivers/media/common/siano/smsdvb-debugfs.c b/drivers/media/common/siano/smsdvb-debugfs.c
index 403645fe9079..40891f4f842b 100644
--- a/drivers/media/common/siano/smsdvb-debugfs.c
+++ b/drivers/media/common/siano/smsdvb-debugfs.c
@@ -371,7 +371,7 @@ static __poll_t smsdvb_stats_poll(struct file *file, poll_table *wait)
rc = smsdvb_stats_wait_read(debug_data);
kref_put(&debug_data->refcount, smsdvb_debugfs_data_release);
- return rc > 0 ? POLLIN | POLLRDNORM : 0;
+ return rc > 0 ? EPOLLIN | EPOLLRDNORM : 0;
}
static ssize_t smsdvb_stats_read(struct file *file, char __user *user_buf,
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index 9a84c7092714..debe35fc66b4 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -2038,9 +2038,9 @@ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
struct vb2_buffer *vb = NULL;
unsigned long flags;
- if (!q->is_output && !(req_events & (POLLIN | POLLRDNORM)))
+ if (!q->is_output && !(req_events & (EPOLLIN | EPOLLRDNORM)))
return 0;
- if (q->is_output && !(req_events & (POLLOUT | POLLWRNORM)))
+ if (q->is_output && !(req_events & (EPOLLOUT | EPOLLWRNORM)))
return 0;
/*
@@ -2048,18 +2048,18 @@ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
*/
if (q->num_buffers == 0 && !vb2_fileio_is_active(q)) {
if (!q->is_output && (q->io_modes & VB2_READ) &&
- (req_events & (POLLIN | POLLRDNORM))) {
+ (req_events & (EPOLLIN | EPOLLRDNORM))) {
if (__vb2_init_fileio(q, 1))
- return POLLERR;
+ return EPOLLERR;
}
if (q->is_output && (q->io_modes & VB2_WRITE) &&
- (req_events & (POLLOUT | POLLWRNORM))) {
+ (req_events & (EPOLLOUT | EPOLLWRNORM))) {
if (__vb2_init_fileio(q, 0))
- return POLLERR;
+ return EPOLLERR;
/*
* Write to OUTPUT queue can be done immediately.
*/
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
}
}
@@ -2068,24 +2068,24 @@ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
* error flag is set.
*/
if (!vb2_is_streaming(q) || q->error)
- return POLLERR;
+ return EPOLLERR;
/*
* If this quirk is set and QBUF hasn't been called yet then
- * return POLLERR as well. This only affects capture queues, output
+ * return EPOLLERR as well. This only affects capture queues, output
* queues will always initialize waiting_for_buffers to false.
* This quirk is set by V4L2 for backwards compatibility reasons.
*/
if (q->quirk_poll_must_check_waiting_for_buffers &&
- q->waiting_for_buffers && (req_events & (POLLIN | POLLRDNORM)))
- return POLLERR;
+ q->waiting_for_buffers && (req_events & (EPOLLIN | EPOLLRDNORM)))
+ return EPOLLERR;
/*
* For output streams you can call write() as long as there are fewer
* buffers queued than there are buffers available.
*/
if (q->is_output && q->fileio && q->queued_count < q->num_buffers)
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
if (list_empty(&q->done_list)) {
/*
@@ -2093,7 +2093,7 @@ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
* return immediately. DQBUF will return -EPIPE.
*/
if (q->last_buffer_dequeued)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(file, &q->done_wq, wait);
}
@@ -2110,8 +2110,8 @@ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
if (vb && (vb->state == VB2_BUF_STATE_DONE
|| vb->state == VB2_BUF_STATE_ERROR)) {
return (q->is_output) ?
- POLLOUT | POLLWRNORM :
- POLLIN | POLLRDNORM;
+ EPOLLOUT | EPOLLWRNORM :
+ EPOLLIN | EPOLLRDNORM;
}
return 0;
}
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index d9a487aab99c..886a2d8d5c6c 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -658,7 +658,7 @@ int vb2_queue_init(struct vb2_queue *q)
== V4L2_BUF_FLAG_TIMESTAMP_COPY;
/*
* For compatibility with vb1: if QBUF hasn't been called yet, then
- * return POLLERR as well. This only affects capture queues, output
+ * return EPOLLERR as well. This only affects capture queues, output
* queues will always initialize waiting_for_buffers to false.
*/
q->quirk_poll_must_check_waiting_for_buffers = true;
@@ -683,8 +683,8 @@ __poll_t vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait)
struct v4l2_fh *fh = file->private_data;
if (v4l2_event_pending(fh))
- res = POLLPRI;
- else if (req_events & POLLPRI)
+ res = EPOLLPRI;
+ else if (req_events & EPOLLPRI)
poll_wait(file, &fh->wait, wait);
}
@@ -921,7 +921,7 @@ __poll_t vb2_fop_poll(struct file *file, poll_table *wait)
WARN_ON(!lock);
if (lock && mutex_lock_interruptible(lock))
- return POLLERR;
+ return EPOLLERR;
fileio = q->fileio;
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index bc198f84b9cd..6d53af00190e 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -1179,7 +1179,7 @@ static __poll_t dvb_demux_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
if ((!dmxdevfilter) || dmxdevfilter->dev->exit)
- return POLLERR;
+ return EPOLLERR;
if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx))
return dvb_vb2_poll(&dmxdevfilter->vb2_ctx, file, wait);
@@ -1191,10 +1191,10 @@ static __poll_t dvb_demux_poll(struct file *file, poll_table *wait)
return 0;
if (dmxdevfilter->buffer.error)
- mask |= (POLLIN | POLLRDNORM | POLLPRI | POLLERR);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);
if (!dvb_ringbuffer_empty(&dmxdevfilter->buffer))
- mask |= (POLLIN | POLLRDNORM | POLLPRI);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI);
return mask;
}
@@ -1331,7 +1331,7 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
dprintk("%s\n", __func__);
if (dmxdev->exit)
- return POLLERR;
+ return EPOLLERR;
if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
return dvb_vb2_poll(&dmxdev->dvr_vb2_ctx, file, wait);
@@ -1343,12 +1343,12 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
#endif
if (need_ringbuffer) {
if (dmxdev->dvr_buffer.error)
- mask |= (POLLIN | POLLRDNORM | POLLPRI | POLLERR);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);
if (!dvb_ringbuffer_empty(&dmxdev->dvr_buffer))
- mask |= (POLLIN | POLLRDNORM | POLLPRI);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI);
} else
- mask |= (POLLOUT | POLLWRNORM | POLLPRI);
+ mask |= (EPOLLOUT | EPOLLWRNORM | EPOLLPRI);
return mask;
}
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index b462ebc0c544..204d0f6c678d 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -1796,7 +1796,7 @@ static __poll_t dvb_ca_en50221_io_poll(struct file *file, poll_table *wait)
dprintk("%s\n", __func__);
if (dvb_ca_en50221_io_read_condition(ca, &result, &slot) == 1)
- mask |= POLLIN;
+ mask |= EPOLLIN;
/* if there is something, return now */
if (mask)
@@ -1806,7 +1806,7 @@ static __poll_t dvb_ca_en50221_io_poll(struct file *file, poll_table *wait)
poll_wait(file, &ca->wait_queue, wait);
if (dvb_ca_en50221_io_read_condition(ca, &result, &slot) == 1)
- mask |= POLLIN;
+ mask |= EPOLLIN;
return mask;
}
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 87fc1bcae5ae..a7ed16e0841d 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -2646,7 +2646,7 @@ static __poll_t dvb_frontend_poll(struct file *file, struct poll_table_struct *w
poll_wait (file, &fepriv->events.wait_queue, wait);
if (fepriv->events.eventw != fepriv->events.eventr)
- return (POLLIN | POLLRDNORM | POLLPRI);
+ return (EPOLLIN | EPOLLRDNORM | EPOLLPRI);
return 0;
}
diff --git a/drivers/media/firewire/firedtv-ci.c b/drivers/media/firewire/firedtv-ci.c
index b4ddfff74267..8dc5a7495abe 100644
--- a/drivers/media/firewire/firedtv-ci.c
+++ b/drivers/media/firewire/firedtv-ci.c
@@ -209,7 +209,7 @@ static int fdtv_ca_ioctl(struct file *file, unsigned int cmd, void *arg)
static __poll_t fdtv_ca_io_poll(struct file *file, poll_table *wait)
{
- return POLLIN;
+ return EPOLLIN;
}
static const struct file_operations fdtv_ca_fops = {
diff --git a/drivers/media/i2c/saa6588.c b/drivers/media/i2c/saa6588.c
index 00640233a5e3..c3089bd34df2 100644
--- a/drivers/media/i2c/saa6588.c
+++ b/drivers/media/i2c/saa6588.c
@@ -413,7 +413,7 @@ static long saa6588_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
case SAA6588_CMD_POLL:
a->result = 0;
if (s->data_available_for_read)
- a->result |= POLLIN | POLLRDNORM;
+ a->result |= EPOLLIN | EPOLLRDNORM;
poll_wait(a->instance, &s->read_queue, a->event_list);
break;
diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c
index 3049b1f505e5..67ac51eff15c 100644
--- a/drivers/media/media-devnode.c
+++ b/drivers/media/media-devnode.c
@@ -105,7 +105,7 @@ static __poll_t media_poll(struct file *filp,
struct media_devnode *devnode = media_devnode_data(filp);
if (!media_devnode_is_registered(devnode))
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
if (!devnode->fops->poll)
return DEFAULT_POLLMASK;
return devnode->fops->poll(filp, poll);
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index c988669e22ff..f697698fe38d 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -2964,39 +2964,39 @@ static __poll_t bttv_poll(struct file *file, poll_table *wait)
__poll_t req_events = poll_requested_events(wait);
if (v4l2_event_pending(&fh->fh))
- rc = POLLPRI;
- else if (req_events & POLLPRI)
+ rc = EPOLLPRI;
+ else if (req_events & EPOLLPRI)
poll_wait(file, &fh->fh.wait, wait);
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return rc;
if (V4L2_BUF_TYPE_VBI_CAPTURE == fh->type) {
if (!check_alloc_btres_lock(fh->btv,fh,RESOURCE_VBI))
- return rc | POLLERR;
+ return rc | EPOLLERR;
return rc | videobuf_poll_stream(file, &fh->vbi, wait);
}
if (check_btres(fh,RESOURCE_VIDEO_STREAM)) {
/* streaming capture */
if (list_empty(&fh->cap.stream))
- return rc | POLLERR;
+ return rc | EPOLLERR;
buf = list_entry(fh->cap.stream.next,struct bttv_buffer,vb.stream);
} else {
/* read() capture */
if (NULL == fh->cap.read_buf) {
/* need to capture a new frame */
if (locked_btres(fh->btv,RESOURCE_VIDEO_STREAM))
- return rc | POLLERR;
+ return rc | EPOLLERR;
fh->cap.read_buf = videobuf_sg_alloc(fh->cap.msize);
if (NULL == fh->cap.read_buf)
- return rc | POLLERR;
+ return rc | EPOLLERR;
fh->cap.read_buf->memory = V4L2_MEMORY_USERPTR;
field = videobuf_next_field(&fh->cap);
if (0 != fh->cap.ops->buf_prepare(&fh->cap,fh->cap.read_buf,field)) {
kfree (fh->cap.read_buf);
fh->cap.read_buf = NULL;
- return rc | POLLERR;
+ return rc | EPOLLERR;
}
fh->cap.ops->buf_queue(&fh->cap,fh->cap.read_buf);
fh->cap.read_off = 0;
@@ -3007,7 +3007,7 @@ static __poll_t bttv_poll(struct file *file, poll_table *wait)
poll_wait(file, &buf->vb.done, wait);
if (buf->vb.state == VIDEOBUF_DONE ||
buf->vb.state == VIDEOBUF_ERROR)
- rc = rc | POLLIN|POLLRDNORM;
+ rc = rc | EPOLLIN|EPOLLRDNORM;
return rc;
}
@@ -3338,8 +3338,8 @@ static __poll_t radio_poll(struct file *file, poll_table *wait)
__poll_t res = 0;
if (v4l2_event_pending(&fh->fh))
- res = POLLPRI;
- else if (req_events & POLLPRI)
+ res = EPOLLPRI;
+ else if (req_events & EPOLLPRI)
poll_wait(file, &fh->fh.wait, wait);
radio_enable(btv);
cmd.instance = file;
diff --git a/drivers/media/pci/cx18/cx18-fileops.c b/drivers/media/pci/cx18/cx18-fileops.c
index a8dbb922ba4b..a3f44e30f821 100644
--- a/drivers/media/pci/cx18/cx18-fileops.c
+++ b/drivers/media/pci/cx18/cx18-fileops.c
@@ -613,7 +613,7 @@ __poll_t cx18_v4l2_enc_poll(struct file *filp, poll_table *wait)
/* Start a capture if there is none */
if (!eof && !test_bit(CX18_F_S_STREAMING, &s->s_flags) &&
- (req_events & (POLLIN | POLLRDNORM))) {
+ (req_events & (EPOLLIN | EPOLLRDNORM))) {
int rc;
mutex_lock(&cx->serialize_lock);
@@ -622,7 +622,7 @@ __poll_t cx18_v4l2_enc_poll(struct file *filp, poll_table *wait)
if (rc) {
CX18_DEBUG_INFO("Could not start capture for %s (%d)\n",
s->name, rc);
- return POLLERR;
+ return EPOLLERR;
}
CX18_DEBUG_FILE("Encoder poll started capture\n");
}
@@ -632,23 +632,23 @@ __poll_t cx18_v4l2_enc_poll(struct file *filp, poll_table *wait)
__poll_t videobuf_poll = videobuf_poll_stream(filp, &s->vbuf_q, wait);
if (v4l2_event_pending(&id->fh))
- res |= POLLPRI;
- if (eof && videobuf_poll == POLLERR)
- return res | POLLHUP;
+ res |= EPOLLPRI;
+ if (eof && videobuf_poll == EPOLLERR)
+ return res | EPOLLHUP;
return res | videobuf_poll;
}
/* add stream's waitq to the poll list */
CX18_DEBUG_HI_FILE("Encoder poll\n");
if (v4l2_event_pending(&id->fh))
- res |= POLLPRI;
+ res |= EPOLLPRI;
else
poll_wait(filp, &s->waitq, wait);
if (atomic_read(&s->q_full.depth))
- return res | POLLIN | POLLRDNORM;
+ return res | EPOLLIN | EPOLLRDNORM;
if (eof)
- return res | POLLHUP;
+ return res | EPOLLHUP;
return res;
}
diff --git a/drivers/media/pci/ddbridge/ddbridge-core.c b/drivers/media/pci/ddbridge/ddbridge-core.c
index 42b42824382c..f9bee36f1cad 100644
--- a/drivers/media/pci/ddbridge/ddbridge-core.c
+++ b/drivers/media/pci/ddbridge/ddbridge-core.c
@@ -745,9 +745,9 @@ static __poll_t ts_poll(struct file *file, poll_table *wait)
poll_wait(file, &input->dma->wq, wait);
poll_wait(file, &output->dma->wq, wait);
if (ddb_input_avail(input) >= 188)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (ddb_output_free(output) >= 188)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/media/pci/ivtv/ivtv-fileops.c b/drivers/media/pci/ivtv/ivtv-fileops.c
index 4aa773507201..6196daae4b3e 100644
--- a/drivers/media/pci/ivtv/ivtv-fileops.c
+++ b/drivers/media/pci/ivtv/ivtv-fileops.c
@@ -747,7 +747,7 @@ __poll_t ivtv_v4l2_dec_poll(struct file *filp, poll_table *wait)
/* Turn off the old-style vsync events */
clear_bit(IVTV_F_I_EV_VSYNC_ENABLED, &itv->i_flags);
if (v4l2_event_pending(&id->fh))
- res = POLLPRI;
+ res = EPOLLPRI;
} else {
/* This is the old-style API which is here only for backwards
compatibility. */
@@ -755,12 +755,12 @@ __poll_t ivtv_v4l2_dec_poll(struct file *filp, poll_table *wait)
set_bit(IVTV_F_I_EV_VSYNC_ENABLED, &itv->i_flags);
if (test_bit(IVTV_F_I_EV_VSYNC, &itv->i_flags) ||
test_bit(IVTV_F_I_EV_DEC_STOPPED, &itv->i_flags))
- res = POLLPRI;
+ res = EPOLLPRI;
}
/* Allow write if buffers are available for writing */
if (s->q_free.buffers)
- res |= POLLOUT | POLLWRNORM;
+ res |= EPOLLOUT | EPOLLWRNORM;
return res;
}
@@ -776,7 +776,7 @@ __poll_t ivtv_v4l2_enc_poll(struct file *filp, poll_table *wait)
/* Start a capture if there is none */
if (!eof && !test_bit(IVTV_F_S_STREAMING, &s->s_flags) &&
s->type != IVTV_ENC_STREAM_TYPE_RAD &&
- (req_events & (POLLIN | POLLRDNORM))) {
+ (req_events & (EPOLLIN | EPOLLRDNORM))) {
int rc;
mutex_lock(&itv->serialize_lock);
@@ -785,7 +785,7 @@ __poll_t ivtv_v4l2_enc_poll(struct file *filp, poll_table *wait)
if (rc) {
IVTV_DEBUG_INFO("Could not start capture for %s (%d)\n",
s->name, rc);
- return POLLERR;
+ return EPOLLERR;
}
IVTV_DEBUG_FILE("Encoder poll started capture\n");
}
@@ -794,14 +794,14 @@ __poll_t ivtv_v4l2_enc_poll(struct file *filp, poll_table *wait)
IVTV_DEBUG_HI_FILE("Encoder poll\n");
poll_wait(filp, &s->waitq, wait);
if (v4l2_event_pending(&id->fh))
- res |= POLLPRI;
+ res |= EPOLLPRI;
else
poll_wait(filp, &id->fh.wait, wait);
if (s->q_full.length || s->q_io.length)
- return res | POLLIN | POLLRDNORM;
+ return res | EPOLLIN | EPOLLRDNORM;
if (eof)
- return res | POLLHUP;
+ return res | EPOLLHUP;
return res;
}
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index ae83293723ba..dedcdb573427 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1430,7 +1430,7 @@ static __poll_t meye_poll(struct file *file, poll_table *wait)
mutex_lock(&meye.lock);
poll_wait(file, &meye.proc_list, wait);
if (kfifo_len(&meye.doneq))
- res |= POLLIN | POLLRDNORM;
+ res |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&meye.lock);
return res;
}
diff --git a/drivers/media/pci/saa7164/saa7164-encoder.c b/drivers/media/pci/saa7164/saa7164-encoder.c
index e7b31a5b14fd..32136ebe4f61 100644
--- a/drivers/media/pci/saa7164/saa7164-encoder.c
+++ b/drivers/media/pci/saa7164/saa7164-encoder.c
@@ -925,13 +925,13 @@ static __poll_t fops_poll(struct file *file, poll_table *wait)
saa7164_histogram_update(&port->poll_interval,
port->last_poll_msecs_diff);
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return mask;
if (atomic_cmpxchg(&fh->v4l_reading, 0, 1) == 0) {
if (atomic_inc_return(&port->v4l_reader_count) == 1) {
if (saa7164_encoder_initialize(port) < 0)
- return mask | POLLERR;
+ return mask | EPOLLERR;
saa7164_encoder_start_streaming(port);
msleep(200);
}
@@ -939,7 +939,7 @@ static __poll_t fops_poll(struct file *file, poll_table *wait)
/* Pull the first buffer from the used list */
if (!list_empty(&port->list_buf_used.list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/media/pci/saa7164/saa7164-vbi.c b/drivers/media/pci/saa7164/saa7164-vbi.c
index 6f97c8f2e00d..64ab91c24c18 100644
--- a/drivers/media/pci/saa7164/saa7164-vbi.c
+++ b/drivers/media/pci/saa7164/saa7164-vbi.c
@@ -650,7 +650,7 @@ static __poll_t fops_poll(struct file *file, poll_table *wait)
/* Pull the first buffer from the used list */
if (!list_empty(&port->list_buf_used.list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/media/pci/ttpci/av7110_av.c b/drivers/media/pci/ttpci/av7110_av.c
index 4d10e2f979d2..4daba76ec240 100644
--- a/drivers/media/pci/ttpci/av7110_av.c
+++ b/drivers/media/pci/ttpci/av7110_av.c
@@ -951,15 +951,15 @@ static __poll_t dvb_video_poll(struct file *file, poll_table *wait)
poll_wait(file, &av7110->video_events.wait_queue, wait);
if (av7110->video_events.eventw != av7110->video_events.eventr)
- mask = POLLPRI;
+ mask = EPOLLPRI;
if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
if (av7110->playing) {
if (FREE_COND)
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
} else {
/* if not playing: may play if asked for */
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
}
}
@@ -1001,9 +1001,9 @@ static __poll_t dvb_audio_poll(struct file *file, poll_table *wait)
if (av7110->playing) {
if (dvb_ringbuffer_free(&av7110->aout) >= 20 * 1024)
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
} else /* if not playing: may play if asked for */
- mask = (POLLOUT | POLLWRNORM);
+ mask = (EPOLLOUT | EPOLLWRNORM);
return mask;
}
diff --git a/drivers/media/pci/ttpci/av7110_ca.c b/drivers/media/pci/ttpci/av7110_ca.c
index 96ca227cf51b..d8c2f1b34d74 100644
--- a/drivers/media/pci/ttpci/av7110_ca.c
+++ b/drivers/media/pci/ttpci/av7110_ca.c
@@ -237,10 +237,10 @@ static __poll_t dvb_ca_poll (struct file *file, poll_table *wait)
poll_wait(file, &wbuf->queue, wait);
if (!dvb_ringbuffer_empty(rbuf))
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
if (dvb_ringbuffer_free(wbuf) > 1024)
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
return mask;
}
diff --git a/drivers/media/pci/zoran/zoran_driver.c b/drivers/media/pci/zoran/zoran_driver.c
index c464dae0389c..8d4e7d930a66 100644
--- a/drivers/media/pci/zoran/zoran_driver.c
+++ b/drivers/media/pci/zoran/zoran_driver.c
@@ -2513,10 +2513,10 @@ zoran_poll (struct file *file,
/* we should check whether buffers are ready to be synced on
* (w/o waits - O_NONBLOCK) here
- * if ready for read (sync), return POLLIN|POLLRDNORM,
- * if ready for write (sync), return POLLOUT|POLLWRNORM,
- * if error, return POLLERR,
- * if no buffers queued or so, return POLLNVAL
+ * if ready for read (sync), return EPOLLIN|EPOLLRDNORM,
+ * if ready for write (sync), return EPOLLOUT|EPOLLWRNORM,
+ * if error, return EPOLLERR,
+ * if no buffers queued or so, return EPOLLNVAL
*/
switch (fh->map_mode) {
@@ -2536,7 +2536,7 @@ zoran_poll (struct file *file,
if (fh->buffers.active != ZORAN_FREE &&
/* Buffer ready to DQBUF? */
zr->v4l_buffers.buffer[frame].state == BUZ_STATE_DONE)
- res |= POLLIN | POLLRDNORM;
+ res |= EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&zr->spinlock, flags);
break;
@@ -2557,9 +2557,9 @@ zoran_poll (struct file *file,
if (fh->buffers.active != ZORAN_FREE &&
zr->jpg_buffers.buffer[frame].state == BUZ_STATE_DONE) {
if (fh->map_mode == ZORAN_MAP_MODE_JPG_REC)
- res |= POLLIN | POLLRDNORM;
+ res |= EPOLLIN | EPOLLRDNORM;
else
- res |= POLLOUT | POLLWRNORM;
+ res |= EPOLLOUT | EPOLLWRNORM;
}
spin_unlock_irqrestore(&zr->spinlock, flags);
@@ -2570,7 +2570,7 @@ zoran_poll (struct file *file,
KERN_ERR
"%s: %s - internal error, unknown map_mode=%d\n",
ZR_DEVNAME(zr), __func__, fh->map_mode);
- res |= POLLERR;
+ res |= EPOLLERR;
}
return res;
diff --git a/drivers/media/platform/fsl-viu.c b/drivers/media/platform/fsl-viu.c
index de285a269390..200c47c69a75 100644
--- a/drivers/media/platform/fsl-viu.c
+++ b/drivers/media/platform/fsl-viu.c
@@ -1272,9 +1272,9 @@ static __poll_t viu_poll(struct file *file, struct poll_table_struct *wait)
__poll_t res = v4l2_ctrl_poll(file, wait);
if (V4L2_BUF_TYPE_VIDEO_CAPTURE != fh->type)
- return POLLERR;
+ return EPOLLERR;
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return res;
mutex_lock(&dev->lock);
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
index f15cf24c1c63..d5b94fc0040e 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
@@ -1008,7 +1008,7 @@ static __poll_t s5p_mfc_poll(struct file *file,
*/
if ((!src_q->streaming || list_empty(&src_q->queued_list))
&& (!dst_q->streaming || list_empty(&dst_q->queued_list))) {
- rc = POLLERR;
+ rc = EPOLLERR;
goto end;
}
mutex_unlock(&dev->mfc_mutex);
@@ -1017,14 +1017,14 @@ static __poll_t s5p_mfc_poll(struct file *file,
poll_wait(file, &dst_q->done_wq, wait);
mutex_lock(&dev->mfc_mutex);
if (v4l2_event_pending(&ctx->fh))
- rc |= POLLPRI;
+ rc |= EPOLLPRI;
spin_lock_irqsave(&src_q->done_lock, flags);
if (!list_empty(&src_q->done_list))
src_vb = list_first_entry(&src_q->done_list, struct vb2_buffer,
done_entry);
if (src_vb && (src_vb->state == VB2_BUF_STATE_DONE
|| src_vb->state == VB2_BUF_STATE_ERROR))
- rc |= POLLOUT | POLLWRNORM;
+ rc |= EPOLLOUT | EPOLLWRNORM;
spin_unlock_irqrestore(&src_q->done_lock, flags);
spin_lock_irqsave(&dst_q->done_lock, flags);
if (!list_empty(&dst_q->done_list))
@@ -1032,7 +1032,7 @@ static __poll_t s5p_mfc_poll(struct file *file,
done_entry);
if (dst_vb && (dst_vb->state == VB2_BUF_STATE_DONE
|| dst_vb->state == VB2_BUF_STATE_ERROR))
- rc |= POLLIN | POLLRDNORM;
+ rc |= EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&dst_q->done_lock, flags);
end:
mutex_unlock(&dev->mfc_mutex);
diff --git a/drivers/media/platform/soc_camera/soc_camera.c b/drivers/media/platform/soc_camera/soc_camera.c
index 70fc5f01942d..c86dd2fdab84 100644
--- a/drivers/media/platform/soc_camera/soc_camera.c
+++ b/drivers/media/platform/soc_camera/soc_camera.c
@@ -809,10 +809,10 @@ static __poll_t soc_camera_poll(struct file *file, poll_table *pt)
{
struct soc_camera_device *icd = file->private_data;
struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
- __poll_t res = POLLERR;
+ __poll_t res = EPOLLERR;
if (icd->streamer != file)
- return POLLERR;
+ return EPOLLERR;
mutex_lock(&ici->host_lock);
res = ici->ops->poll(file, pt);
diff --git a/drivers/media/platform/vivid/vivid-radio-rx.c b/drivers/media/platform/vivid/vivid-radio-rx.c
index fcb7a9f015b6..f834f7df8cf9 100644
--- a/drivers/media/platform/vivid/vivid-radio-rx.c
+++ b/drivers/media/platform/vivid/vivid-radio-rx.c
@@ -142,7 +142,7 @@ retry:
__poll_t vivid_radio_rx_poll(struct file *file, struct poll_table_struct *wait)
{
- return POLLIN | POLLRDNORM | v4l2_ctrl_poll(file, wait);
+ return EPOLLIN | EPOLLRDNORM | v4l2_ctrl_poll(file, wait);
}
int vivid_radio_rx_enum_freq_bands(struct file *file, void *fh, struct v4l2_frequency_band *band)
diff --git a/drivers/media/platform/vivid/vivid-radio-tx.c b/drivers/media/platform/vivid/vivid-radio-tx.c
index af4907a197a3..308b13f85dc0 100644
--- a/drivers/media/platform/vivid/vivid-radio-tx.c
+++ b/drivers/media/platform/vivid/vivid-radio-tx.c
@@ -105,7 +105,7 @@ retry:
__poll_t vivid_radio_tx_poll(struct file *file, struct poll_table_struct *wait)
{
- return POLLOUT | POLLWRNORM | v4l2_ctrl_poll(file, wait);
+ return EPOLLOUT | EPOLLWRNORM | v4l2_ctrl_poll(file, wait);
}
int vidioc_g_modulator(struct file *file, void *fh, struct v4l2_modulator *a)
diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
index af7c68b344d1..5b82e63885cd 100644
--- a/drivers/media/radio/radio-cadet.c
+++ b/drivers/media/radio/radio-cadet.c
@@ -488,14 +488,14 @@ static __poll_t cadet_poll(struct file *file, struct poll_table_struct *wait)
__poll_t res = v4l2_ctrl_poll(file, wait);
poll_wait(file, &dev->read_queue, wait);
- if (dev->rdsstat == 0 && (req_events & (POLLIN | POLLRDNORM))) {
+ if (dev->rdsstat == 0 && (req_events & (EPOLLIN | EPOLLRDNORM))) {
mutex_lock(&dev->lock);
if (dev->rdsstat == 0)
cadet_start_rds(dev);
mutex_unlock(&dev->lock);
}
if (cadet_has_rds_data(dev))
- res |= POLLIN | POLLRDNORM;
+ res |= EPOLLIN | EPOLLRDNORM;
return res;
}
diff --git a/drivers/media/radio/radio-si476x.c b/drivers/media/radio/radio-si476x.c
index bff9789ae9bc..b52e678c6901 100644
--- a/drivers/media/radio/radio-si476x.c
+++ b/drivers/media/radio/radio-si476x.c
@@ -1158,15 +1158,15 @@ static __poll_t si476x_radio_fops_poll(struct file *file,
__poll_t req_events = poll_requested_events(pts);
__poll_t err = v4l2_ctrl_poll(file, pts);
- if (req_events & (POLLIN | POLLRDNORM)) {
+ if (req_events & (EPOLLIN | EPOLLRDNORM)) {
if (atomic_read(&radio->core->is_alive))
poll_wait(file, &radio->core->rds_read_queue, pts);
if (!atomic_read(&radio->core->is_alive))
- err = POLLHUP;
+ err = EPOLLHUP;
if (!kfifo_is_empty(&radio->core->rds_fifo))
- err = POLLIN | POLLRDNORM;
+ err = EPOLLIN | EPOLLRDNORM;
}
return err;
diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c
index f92b0f9241a9..58e944591602 100644
--- a/drivers/media/radio/radio-wl1273.c
+++ b/drivers/media/radio/radio-wl1273.c
@@ -1104,10 +1104,10 @@ static __poll_t wl1273_fm_fops_poll(struct file *file,
poll_wait(file, &radio->read_queue, pts);
if (radio->rd_index != radio->wr_index)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
} else if (core->mode == WL1273_MODE_TX) {
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
}
return 0;
diff --git a/drivers/media/radio/si470x/radio-si470x-common.c b/drivers/media/radio/si470x/radio-si470x-common.c
index 5b477b7d6a66..e0054e0f410d 100644
--- a/drivers/media/radio/si470x/radio-si470x-common.c
+++ b/drivers/media/radio/si470x/radio-si470x-common.c
@@ -514,7 +514,7 @@ static __poll_t si470x_fops_poll(struct file *file,
__poll_t req_events = poll_requested_events(pts);
__poll_t retval = v4l2_ctrl_poll(file, pts);
- if (req_events & (POLLIN | POLLRDNORM)) {
+ if (req_events & (EPOLLIN | EPOLLRDNORM)) {
/* switch on rds reception */
if ((radio->registers[SYSCONFIG1] & SYSCONFIG1_RDS) == 0)
si470x_rds_on(radio);
@@ -522,7 +522,7 @@ static __poll_t si470x_fops_poll(struct file *file,
poll_wait(file, &radio->read_queue, pts);
if (radio->rd_index != radio->wr_index)
- retval |= POLLIN | POLLRDNORM;
+ retval |= EPOLLIN | EPOLLRDNORM;
}
return retval;
diff --git a/drivers/media/radio/wl128x/fmdrv_v4l2.c b/drivers/media/radio/wl128x/fmdrv_v4l2.c
index fd603c1b96bb..dccdf6558e6a 100644
--- a/drivers/media/radio/wl128x/fmdrv_v4l2.c
+++ b/drivers/media/radio/wl128x/fmdrv_v4l2.c
@@ -112,7 +112,7 @@ static __poll_t fm_v4l2_fops_poll(struct file *file, struct poll_table_struct *p
ret = fmc_is_rds_data_available(fmdev, file, pts);
mutex_unlock(&fmdev->mutex);
if (ret < 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index b3544988586e..cc863044c880 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -109,7 +109,7 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev)
if (LIRC_IS_TIMEOUT(sample) && !fh->send_timeout_reports)
continue;
if (kfifo_put(&fh->rawir, sample))
- wake_up_poll(&fh->wait_poll, POLLIN | POLLRDNORM);
+ wake_up_poll(&fh->wait_poll, EPOLLIN | EPOLLRDNORM);
}
spin_unlock_irqrestore(&dev->lirc_fh_lock, flags);
}
@@ -130,7 +130,7 @@ void ir_lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc)
spin_lock_irqsave(&dev->lirc_fh_lock, flags);
list_for_each_entry(fh, &dev->lirc_fh, list) {
if (kfifo_put(&fh->scancodes, *lsc))
- wake_up_poll(&fh->wait_poll, POLLIN | POLLRDNORM);
+ wake_up_poll(&fh->wait_poll, EPOLLIN | EPOLLRDNORM);
}
spin_unlock_irqrestore(&dev->lirc_fh_lock, flags);
}
@@ -603,15 +603,15 @@ static __poll_t ir_lirc_poll(struct file *file, struct poll_table_struct *wait)
poll_wait(file, &fh->wait_poll, wait);
if (!rcdev->registered) {
- events = POLLHUP | POLLERR;
+ events = EPOLLHUP | EPOLLERR;
} else if (rcdev->driver_type != RC_DRIVER_IR_RAW_TX) {
if (fh->rec_mode == LIRC_MODE_SCANCODE &&
!kfifo_is_empty(&fh->scancodes))
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
if (fh->rec_mode == LIRC_MODE_MODE2 &&
!kfifo_is_empty(&fh->rawir))
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
}
return events;
@@ -779,7 +779,7 @@ void ir_lirc_unregister(struct rc_dev *dev)
spin_lock_irqsave(&dev->lirc_fh_lock, flags);
list_for_each_entry(fh, &dev->lirc_fh, list)
- wake_up_poll(&fh->wait_poll, POLLHUP | POLLERR);
+ wake_up_poll(&fh->wait_poll, EPOLLHUP | EPOLLERR);
spin_unlock_irqrestore(&dev->lirc_fh_lock, flags);
cdev_device_del(&dev->lirc_cdev, &dev->lirc_dev);
diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c
index e7524920c618..3dfbb545c0e3 100644
--- a/drivers/media/usb/cpia2/cpia2_core.c
+++ b/drivers/media/usb/cpia2/cpia2_core.c
@@ -2375,7 +2375,7 @@ __poll_t cpia2_poll(struct camera_data *cam, struct file *filp,
{
__poll_t status = v4l2_ctrl_poll(filp, wait);
- if ((poll_requested_events(wait) & (POLLIN | POLLRDNORM)) &&
+ if ((poll_requested_events(wait) & (EPOLLIN | EPOLLRDNORM)) &&
!cam->streaming) {
/* Start streaming */
cpia2_usb_stream_start(cam,
@@ -2385,7 +2385,7 @@ __poll_t cpia2_poll(struct camera_data *cam, struct file *filp,
poll_wait(filp, &cam->wq_stream, wait);
if (cam->curbuff->status == FRAME_READY)
- status |= POLLIN | POLLRDNORM;
+ status |= EPOLLIN | EPOLLRDNORM;
return status;
}
diff --git a/drivers/media/usb/cx231xx/cx231xx-417.c b/drivers/media/usb/cx231xx/cx231xx-417.c
index 103e3299b77f..b80e6857e2eb 100644
--- a/drivers/media/usb/cx231xx/cx231xx-417.c
+++ b/drivers/media/usb/cx231xx/cx231xx-417.c
@@ -1821,11 +1821,11 @@ static __poll_t mpeg_poll(struct file *file,
__poll_t res = 0;
if (v4l2_event_pending(&fh->fh))
- res |= POLLPRI;
+ res |= EPOLLPRI;
else
poll_wait(file, &fh->fh.wait, wait);
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return res;
mutex_lock(&dev->lock);
diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c
index 271f35208c49..5b321b8ada3a 100644
--- a/drivers/media/usb/cx231xx/cx231xx-video.c
+++ b/drivers/media/usb/cx231xx/cx231xx-video.c
@@ -2018,19 +2018,19 @@ static __poll_t cx231xx_v4l2_poll(struct file *filp, poll_table *wait)
rc = check_dev(dev);
if (rc < 0)
- return POLLERR;
+ return EPOLLERR;
rc = res_get(fh);
if (unlikely(rc < 0))
- return POLLERR;
+ return EPOLLERR;
if (v4l2_event_pending(&fh->fh))
- res |= POLLPRI;
+ res |= EPOLLPRI;
else
poll_wait(filp, &fh->fh.wait, wait);
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return res;
if ((V4L2_BUF_TYPE_VIDEO_CAPTURE == fh->type) ||
@@ -2040,7 +2040,7 @@ static __poll_t cx231xx_v4l2_poll(struct file *filp, poll_table *wait)
mutex_unlock(&dev->lock);
return res;
}
- return res | POLLERR;
+ return res | EPOLLERR;
}
/*
diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c
index 87e18d0c5766..d29773b8f696 100644
--- a/drivers/media/usb/gspca/gspca.c
+++ b/drivers/media/usb/gspca/gspca.c
@@ -1877,14 +1877,14 @@ static __poll_t dev_poll(struct file *file, poll_table *wait)
gspca_dbg(gspca_dev, D_FRAM, "poll\n");
- if (req_events & POLLPRI)
+ if (req_events & EPOLLPRI)
ret |= v4l2_ctrl_poll(file, wait);
- if (req_events & (POLLIN | POLLRDNORM)) {
+ if (req_events & (EPOLLIN | EPOLLRDNORM)) {
/* if reqbufs is not done, the user would use read() */
if (gspca_dev->memory == GSPCA_MEMORY_NO) {
if (read_alloc(gspca_dev, file) != 0) {
- ret |= POLLERR;
+ ret |= EPOLLERR;
goto out;
}
}
@@ -1893,17 +1893,17 @@ static __poll_t dev_poll(struct file *file, poll_table *wait)
/* check if an image has been received */
if (mutex_lock_interruptible(&gspca_dev->queue_lock) != 0) {
- ret |= POLLERR;
+ ret |= EPOLLERR;
goto out;
}
if (gspca_dev->fr_o != atomic_read(&gspca_dev->fr_i))
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&gspca_dev->queue_lock);
}
out:
if (!gspca_dev->present)
- ret |= POLLHUP;
+ ret |= EPOLLHUP;
return ret;
}
diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index 660d4a65401f..77c3d331ff31 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -528,7 +528,7 @@ static __poll_t hdpvr_poll(struct file *filp, poll_table *wait)
struct hdpvr_device *dev = video_drvdata(filp);
__poll_t mask = v4l2_ctrl_poll(filp, wait);
- if (!(req_events & (POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLIN | EPOLLRDNORM)))
return mask;
mutex_lock(&dev->io_mutex);
@@ -553,7 +553,7 @@ static __poll_t hdpvr_poll(struct file *filp, poll_table *wait)
buf = hdpvr_get_next_buffer(dev);
}
if (buf && buf->status == BUFSTAT_READY)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
index ad6290e1b699..9fdc57c1658f 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
@@ -1181,19 +1181,19 @@ static __poll_t pvr2_v4l2_poll(struct file *file, poll_table *wait)
int ret;
if (fh->fw_mode_flag) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
if (!fh->rhp) {
ret = pvr2_v4l2_iosetup(fh);
- if (ret) return POLLERR;
+ if (ret) return EPOLLERR;
}
poll_wait(file,&fh->wait_data,wait);
if (pvr2_ioread_avail(fh->rhp) >= 0) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
return mask;
diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c
index 17ad978c0172..22389b56ec24 100644
--- a/drivers/media/usb/stkwebcam/stk-webcam.c
+++ b/drivers/media/usb/stkwebcam/stk-webcam.c
@@ -729,10 +729,10 @@ static __poll_t v4l_stk_poll(struct file *fp, poll_table *wait)
poll_wait(fp, &dev->wait_frame, wait);
if (!is_present(dev))
- return POLLERR;
+ return EPOLLERR;
if (!list_empty(&dev->sio_full))
- return res | POLLIN | POLLRDNORM;
+ return res | EPOLLIN | EPOLLRDNORM;
return res;
}
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index 96266fa4738c..8314d3fa9241 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -1424,25 +1424,25 @@ __tm6000_poll(struct file *file, struct poll_table_struct *wait)
__poll_t res = 0;
if (v4l2_event_pending(&fh->fh))
- res = POLLPRI;
- else if (req_events & POLLPRI)
+ res = EPOLLPRI;
+ else if (req_events & EPOLLPRI)
poll_wait(file, &fh->fh.wait, wait);
if (V4L2_BUF_TYPE_VIDEO_CAPTURE != fh->type)
- return res | POLLERR;
+ return res | EPOLLERR;
if (!!is_res_streaming(fh->dev, fh))
- return res | POLLERR;
+ return res | EPOLLERR;
if (!is_res_read(fh->dev, fh)) {
/* streaming capture */
if (list_empty(&fh->vb_vidq.stream))
- return res | POLLERR;
+ return res | EPOLLERR;
buf = list_entry(fh->vb_vidq.stream.next, struct tm6000_buffer, vb.stream);
poll_wait(file, &buf->vb.done, wait);
if (buf->vb.state == VIDEOBUF_DONE ||
buf->vb.state == VIDEOBUF_ERROR)
- return res | POLLIN | POLLRDNORM;
- } else if (req_events & (POLLIN | POLLRDNORM)) {
+ return res | EPOLLIN | EPOLLRDNORM;
+ } else if (req_events & (EPOLLIN | EPOLLRDNORM)) {
/* read() capture */
return res | videobuf_poll_stream(file, &fh->vb_vidq, wait);
}
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index b07657149434..ce08b50b8290 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -3462,7 +3462,7 @@ __poll_t v4l2_ctrl_poll(struct file *file, struct poll_table_struct *wait)
struct v4l2_fh *fh = file->private_data;
if (v4l2_event_pending(fh))
- return POLLPRI;
+ return EPOLLPRI;
poll_wait(file, &fh->wait, wait);
return 0;
}
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index cd8127d3f863..0301fe426a43 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -334,7 +334,7 @@ static ssize_t v4l2_write(struct file *filp, const char __user *buf,
static __poll_t v4l2_poll(struct file *filp, struct poll_table_struct *poll)
{
struct video_device *vdev = video_devdata(filp);
- __poll_t res = POLLERR | POLLHUP;
+ __poll_t res = EPOLLERR | EPOLLHUP;
if (!vdev->fops->poll)
return DEFAULT_POLLMASK;
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 186156f8952a..c4f963d96a79 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -514,10 +514,10 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
struct v4l2_fh *fh = file->private_data;
if (v4l2_event_pending(fh))
- rc = POLLPRI;
- else if (req_events & POLLPRI)
+ rc = EPOLLPRI;
+ else if (req_events & EPOLLPRI)
poll_wait(file, &fh->wait, wait);
- if (!(req_events & (POLLOUT | POLLWRNORM | POLLIN | POLLRDNORM)))
+ if (!(req_events & (EPOLLOUT | EPOLLWRNORM | EPOLLIN | EPOLLRDNORM)))
return rc;
}
@@ -531,7 +531,7 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
*/
if ((!src_q->streaming || list_empty(&src_q->queued_list))
&& (!dst_q->streaming || list_empty(&dst_q->queued_list))) {
- rc |= POLLERR;
+ rc |= EPOLLERR;
goto end;
}
@@ -548,7 +548,7 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
*/
if (dst_q->last_buffer_dequeued) {
spin_unlock_irqrestore(&dst_q->done_lock, flags);
- return rc | POLLIN | POLLRDNORM;
+ return rc | EPOLLIN | EPOLLRDNORM;
}
poll_wait(file, &dst_q->done_wq, wait);
@@ -561,7 +561,7 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
done_entry);
if (src_vb && (src_vb->state == VB2_BUF_STATE_DONE
|| src_vb->state == VB2_BUF_STATE_ERROR))
- rc |= POLLOUT | POLLWRNORM;
+ rc |= EPOLLOUT | EPOLLWRNORM;
spin_unlock_irqrestore(&src_q->done_lock, flags);
spin_lock_irqsave(&dst_q->done_lock, flags);
@@ -570,7 +570,7 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
done_entry);
if (dst_vb && (dst_vb->state == VB2_BUF_STATE_DONE
|| dst_vb->state == VB2_BUF_STATE_ERROR))
- rc |= POLLIN | POLLRDNORM;
+ rc |= EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&dst_q->done_lock, flags);
end:
diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 28966fa8c610..c5639817db34 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -476,12 +476,12 @@ static __poll_t subdev_poll(struct file *file, poll_table *wait)
struct v4l2_fh *fh = file->private_data;
if (!(sd->flags & V4L2_SUBDEV_FL_HAS_EVENTS))
- return POLLERR;
+ return EPOLLERR;
poll_wait(file, &fh->wait, wait);
if (v4l2_event_pending(fh))
- return POLLPRI;
+ return EPOLLPRI;
return 0;
}
diff --git a/drivers/media/v4l2-core/videobuf-core.c b/drivers/media/v4l2-core/videobuf-core.c
index 9a89d3ae170f..2b3981842b4b 100644
--- a/drivers/media/v4l2-core/videobuf-core.c
+++ b/drivers/media/v4l2-core/videobuf-core.c
@@ -1131,11 +1131,11 @@ __poll_t videobuf_poll_stream(struct file *file,
if (!list_empty(&q->stream))
buf = list_entry(q->stream.next,
struct videobuf_buffer, stream);
- } else if (req_events & (POLLIN | POLLRDNORM)) {
+ } else if (req_events & (EPOLLIN | EPOLLRDNORM)) {
if (!q->reading)
__videobuf_read_start(q);
if (!q->reading) {
- rc = POLLERR;
+ rc = EPOLLERR;
} else if (NULL == q->read_buf) {
q->read_buf = list_entry(q->stream.next,
struct videobuf_buffer,
@@ -1146,7 +1146,7 @@ __poll_t videobuf_poll_stream(struct file *file,
buf = q->read_buf;
}
if (!buf)
- rc = POLLERR;
+ rc = EPOLLERR;
if (0 == rc) {
poll_wait(file, &buf->done, wait);
@@ -1157,10 +1157,10 @@ __poll_t videobuf_poll_stream(struct file *file,
case V4L2_BUF_TYPE_VBI_OUTPUT:
case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
case V4L2_BUF_TYPE_SDR_OUTPUT:
- rc = POLLOUT | POLLWRNORM;
+ rc = EPOLLOUT | EPOLLWRNORM;
break;
default:
- rc = POLLIN | POLLRDNORM;
+ rc = EPOLLIN | EPOLLRDNORM;
break;
}
}
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index fcb3a92ae85f..8ba41073dd89 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -1267,7 +1267,7 @@ static irqreturn_t ab8500_debug_handler(int irq, void *data)
if (irq_abb < num_irqs)
irq_count[irq_abb]++;
/*
- * This makes it possible to use poll for events (POLLPRI | POLLERR)
+ * This makes it possible to use poll for events (EPOLLPRI | EPOLLERR)
* from userspace on sysfs file named <irq-nr>
*/
sprintf(buf, "%d", irq);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 0162516f5e57..bd6ddbdb5cd1 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -378,11 +378,11 @@ __poll_t afu_poll(struct file *file, struct poll_table_struct *poll)
spin_lock_irqsave(&ctx->lock, flags);
if (ctx_event_pending(ctx))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
else if (ctx->status == CLOSED)
/* Only error on closed when there are no futher events pending
*/
- mask |= POLLERR;
+ mask |= EPOLLERR;
spin_unlock_irqrestore(&ctx->lock, flags);
pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask);
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index 35693c0a78e2..e9c9ef52c76a 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -519,9 +519,9 @@ static __poll_t ilo_poll(struct file *fp, poll_table *wait)
poll_wait(fp, &data->ccb_waitq, wait);
if (is_channel_reset(driver_ccb))
- return POLLERR;
+ return EPOLLERR;
else if (ilo_pkt_recv(data->ilo_hw, driver_ccb))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index e49888eab87d..e9bb1cfa6a7a 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -658,7 +658,7 @@ static __poll_t lis3lv02d_misc_poll(struct file *file, poll_table *wait)
poll_wait(file, &lis3->misc_wait, wait);
if (atomic_read(&lis3->count))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 505b710291e6..758dc73602d5 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -551,31 +551,31 @@ static __poll_t mei_poll(struct file *file, poll_table *wait)
bool notify_en;
if (WARN_ON(!cl || !cl->dev))
- return POLLERR;
+ return EPOLLERR;
dev = cl->dev;
mutex_lock(&dev->device_lock);
- notify_en = cl->notify_en && (req_events & POLLPRI);
+ notify_en = cl->notify_en && (req_events & EPOLLPRI);
if (dev->dev_state != MEI_DEV_ENABLED ||
!mei_cl_is_connected(cl)) {
- mask = POLLERR;
+ mask = EPOLLERR;
goto out;
}
if (notify_en) {
poll_wait(file, &cl->ev_wait, wait);
if (cl->notify_ev)
- mask |= POLLPRI;
+ mask |= EPOLLPRI;
}
- if (req_events & (POLLIN | POLLRDNORM)) {
+ if (req_events & (EPOLLIN | EPOLLRDNORM)) {
poll_wait(file, &cl->rx_wait, wait);
if (!list_empty(&cl->rd_completed))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
else
mei_cl_read_start(cl, mei_cl_mtu(cl), file);
}
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
index 85f7d09cc65f..05a63286741c 100644
--- a/drivers/misc/mic/cosm/cosm_scif_server.c
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -55,7 +55,7 @@
* message being sent to host SCIF. SCIF_DISCNCT message processing on the
* host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
* up the host COSM thread blocked in scif_poll(..) resulting in
- * scif_poll(..) returning POLLHUP.
+ * scif_poll(..) returning EPOLLHUP.
* 5. On the card, scif_peer_release_dev is next called which results in an
* SCIF_EXIT message being sent to the host and after receiving the
* SCIF_EXIT_ACK from the host the peer device teardown on the card is
@@ -79,7 +79,7 @@
* processing. This results in the COSM endpoint on the card being closed and
* the SCIF host peer device on the card getting unregistered similar to
* steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
- * host returns POLLHUP as a result.
+ * host returns EPOLLHUP as a result.
* 4. On the host, card peer device unregister and SCIF HW remove(..) also
* subsequently complete.
*
@@ -87,11 +87,11 @@
* ----------
* If a reset is issued after the card has crashed, there is no SCIF_DISCNT
* message from the card which would result in scif_poll(..) returning
- * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * EPOLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
* message to itself resulting in the card SCIF peer device being unregistered,
* this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
* scif_invalidate_ep call sequence which sets the endpoint state to
- * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
+ * DISCONNECTED and results in scif_poll(..) returning EPOLLHUP.
*/
#define COSM_SCIF_BACKLOG 16
@@ -190,7 +190,7 @@ static void cosm_send_time(struct cosm_device *cdev)
/*
* Close this cosm_device's endpoint after its peer endpoint on the card has
- * been closed. In all cases except MIC card crash POLLHUP on the host is
+ * been closed. In all cases except MIC card crash EPOLLHUP on the host is
* triggered by the client's endpoint being closed.
*/
static void cosm_scif_close(struct cosm_device *cdev)
@@ -252,7 +252,7 @@ void cosm_scif_work(struct work_struct *work)
while (1) {
pollepd.epd = cdev->epd;
- pollepd.events = POLLIN;
+ pollepd.events = EPOLLIN;
/* Drop the mutex before blocking in scif_poll(..) */
mutex_unlock(&cdev->cosm_mutex);
@@ -266,11 +266,11 @@ void cosm_scif_work(struct work_struct *work)
}
/* There is a message from the card */
- if (pollepd.revents & POLLIN)
+ if (pollepd.revents & EPOLLIN)
cosm_scif_recv(cdev);
/* The peer endpoint is closed or this endpoint disconnected */
- if (pollepd.revents & POLLHUP) {
+ if (pollepd.revents & EPOLLHUP) {
cosm_scif_close(cdev);
break;
}
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
index aa530fcceaa9..beafc0da4027 100644
--- a/drivers/misc/mic/cosm_client/cosm_scif_client.c
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -160,7 +160,7 @@ static int cosm_scif_client(void *unused)
while (!kthread_should_stop()) {
pollepd.epd = client_epd;
- pollepd.events = POLLIN;
+ pollepd.events = EPOLLIN;
rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
if (rc < 0) {
@@ -171,7 +171,7 @@ static int cosm_scif_client(void *unused)
continue;
}
- if (pollepd.revents & POLLIN)
+ if (pollepd.revents & EPOLLIN)
cosm_client_recv();
msg.id = COSM_MSG_HEARTBEAT;
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index 8a3e48ec37dd..7b2dddcdd46d 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -1328,7 +1328,7 @@ __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
if (ep->state == SCIFEP_CONNECTED ||
ep->state == SCIFEP_DISCONNECTED ||
ep->conn_err)
- mask |= POLLOUT;
+ mask |= EPOLLOUT;
goto exit;
}
}
@@ -1338,34 +1338,34 @@ __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
_scif_poll_wait(f, &ep->conwq, wait, ep);
if (ep->state == SCIFEP_LISTENING) {
if (ep->conreqcnt)
- mask |= POLLIN;
+ mask |= EPOLLIN;
goto exit;
}
}
/* Endpoint is connected or disconnected */
if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
- if (poll_requested_events(wait) & POLLIN)
+ if (poll_requested_events(wait) & EPOLLIN)
_scif_poll_wait(f, &ep->recvwq, wait, ep);
- if (poll_requested_events(wait) & POLLOUT)
+ if (poll_requested_events(wait) & EPOLLOUT)
_scif_poll_wait(f, &ep->sendwq, wait, ep);
if (ep->state == SCIFEP_CONNECTED ||
ep->state == SCIFEP_DISCONNECTED) {
/* Data can be read without blocking */
if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
- mask |= POLLIN;
+ mask |= EPOLLIN;
/* Data can be written without blocking */
if (scif_rb_space(&ep->qp_info.qp->outbound_q))
- mask |= POLLOUT;
- /* Return POLLHUP if endpoint is disconnected */
+ mask |= EPOLLOUT;
+ /* Return EPOLLHUP if endpoint is disconnected */
if (ep->state == SCIFEP_DISCONNECTED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
goto exit;
}
}
- /* Return POLLERR if the endpoint is in none of the above states */
- mask |= POLLERR;
+ /* Return EPOLLERR if the endpoint is in none of the above states */
+ mask |= EPOLLERR;
exit:
spin_unlock(&ep->lock);
return mask;
@@ -1398,10 +1398,10 @@ scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
pt = &table.pt;
while (1) {
for (i = 0; i < nfds; i++) {
- pt->_key = ufds[i].events | POLLERR | POLLHUP;
+ pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
mask = __scif_pollfd(ufds[i].epd->anon,
pt, ufds[i].epd);
- mask &= ufds[i].events | POLLERR | POLLHUP;
+ mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
if (mask) {
count++;
pt->_qproc = NULL;
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 01d1f2ba7bb8..cbc8ebcff5cf 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -1010,7 +1010,7 @@ __unlock_ret:
}
/*
- * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
+ * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
* not when previously enqueued buffers may be available. This means that
* in the card->host (TX) path, when userspace is unblocked by poll it
* must drain all available descriptors or it can stall.
@@ -1022,15 +1022,15 @@ static __poll_t vop_poll(struct file *f, poll_table *wait)
mutex_lock(&vdev->vdev_mutex);
if (vop_vdev_inited(vdev)) {
- mask = POLLERR;
+ mask = EPOLLERR;
goto done;
}
poll_wait(f, &vdev->waitq, wait);
if (vop_vdev_inited(vdev)) {
- mask = POLLERR;
+ mask = EPOLLERR;
} else if (vdev->poll_wake) {
vdev->poll_wake = 0;
- mask = POLLIN | POLLOUT;
+ mask = EPOLLIN | EPOLLOUT;
}
done:
mutex_unlock(&vdev->vdev_mutex);
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index c90c1a578d2f..d9aa407db06a 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -215,9 +215,9 @@ static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
mutex_unlock(&ctx->status_mutex);
if (afu_events_pending(ctx))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
else if (closed)
- mask = POLLERR;
+ mask = EPOLLERR;
return mask;
}
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 8fa68cf308e0..b084245f6238 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -265,9 +265,9 @@ static __poll_t phantom_poll(struct file *file, poll_table *wait)
poll_wait(file, &dev->wait, wait);
if (!(dev->status & PHB_RUNNING))
- mask = POLLERR;
+ mask = EPOLLERR;
else if (atomic_read(&dev->counter))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter));
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 6640e7651533..83e0c95d20a4 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -182,7 +182,7 @@ static __poll_t vmci_host_poll(struct file *filp, poll_table *wait)
if (context->pending_datagrams > 0 ||
vmci_handle_arr_get_size(
context->pending_doorbell_array) > 0) {
- mask = POLLIN;
+ mask = EPOLLIN;
}
spin_unlock(&context->lock);
}
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index e412dfdda7dd..377af43b81b3 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -2648,11 +2648,11 @@ static __poll_t ca8210_test_int_poll(
poll_wait(filp, &priv->test.readq, ptable);
if (!kfifo_is_empty(&priv->test.up_fifo))
- return_flags |= (POLLIN | POLLRDNORM);
+ return_flags |= (EPOLLIN | EPOLLRDNORM);
if (wait_event_interruptible(
priv->test.readq,
!kfifo_is_empty(&priv->test.up_fifo))) {
- return POLLERR;
+ return EPOLLERR;
}
return return_flags;
}
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index ef6b2126b23a..255a5def56e9 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -539,11 +539,11 @@ static __poll_t ppp_poll(struct file *file, poll_table *wait)
if (!pf)
return 0;
poll_wait(file, &pf->rwait, wait);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (skb_peek(&pf->rq))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (pf->dead)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
else if (pf->kind == INTERFACE) {
/* see comment in ppp_read */
struct ppp *ppp = PF_TO_PPP(pf);
@@ -551,7 +551,7 @@ static __poll_t ppp_poll(struct file *file, poll_table *wait)
ppp_recv_lock(ppp);
if (ppp->n_channels == 0 &&
(ppp->flags & SC_LOOP_TRAFFIC) == 0)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
ppp_recv_unlock(ppp);
}
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5aa59f41bf8c..bd89d1c559ce 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -714,7 +714,7 @@ err_put:
}
static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = sizeof(struct sockaddr_pppox);
struct sockaddr_pppox sp;
@@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(uaddr, &sp, len);
- *usockaddr_len = len;
-
- return 0;
+ return len;
}
static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 6dde9a0cfe76..8249d46a7844 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
}
static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = sizeof(struct sockaddr_pppox);
struct sockaddr_pppox sp;
@@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(uaddr, &sp, len);
- *usockaddr_len = len;
-
- return 0;
+ return len;
}
static int pptp_release(struct socket *sock)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 0a5ed004781c..9b6cb780affe 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -377,7 +377,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
}
wake_up:
- wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_poll(sk_sleep(&q->sk), EPOLLIN | EPOLLRDNORM | EPOLLRDBAND);
return RX_HANDLER_CONSUMED;
drop:
@@ -487,7 +487,7 @@ static void tap_sock_write_space(struct sock *sk)
wqueue = sk_sleep(sk);
if (wqueue && waitqueue_active(wqueue))
- wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_poll(wqueue, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
}
static void tap_sock_destruct(struct sock *sk)
@@ -572,7 +572,7 @@ static int tap_release(struct inode *inode, struct file *file)
static __poll_t tap_poll(struct file *file, poll_table *wait)
{
struct tap_queue *q = file->private_data;
- __poll_t mask = POLLERR;
+ __poll_t mask = EPOLLERR;
if (!q)
goto out;
@@ -581,12 +581,12 @@ static __poll_t tap_poll(struct file *file, poll_table *wait)
poll_wait(file, &q->wq.wait, wait);
if (!ptr_ring_empty(&q->ring))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sock_writeable(&q->sk) ||
(!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) &&
sock_writeable(&q->sk)))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
out:
return mask;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 17e496b88f81..81e6cc951e7f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1437,7 +1437,7 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
if (!tun)
- return POLLERR;
+ return EPOLLERR;
sk = tfile->socket.sk;
@@ -1446,16 +1446,16 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
poll_wait(file, sk_sleep(sk), wait);
if (!ptr_ring_empty(&tfile->tx_ring))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (tun->dev->flags & IFF_UP &&
(sock_writeable(sk) ||
(!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
sock_writeable(sk))))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tun->dev->reg_state != NETREG_REGISTERED)
- mask = POLLERR;
+ mask = EPOLLERR;
tun_put(tun);
return mask;
@@ -2310,8 +2310,8 @@ static void tun_sock_write_space(struct sock *sk)
wqueue = sk_sleep(sk);
if (wqueue && waitqueue_active(wqueue))
- wake_up_interruptible_sync_poll(wqueue, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
tfile = container_of(sk, struct tun_file, sk);
kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
index 72c55d1f8903..ac2572943ed0 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
@@ -309,7 +309,7 @@ static __poll_t rt2x00debug_poll_queue_dump(struct file *file,
poll_wait(file, &intf->frame_dump_waitqueue, wait);
if (!skb_queue_empty(&intf->frame_dump_skbqueue))
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
return 0;
}
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index a60c0ab7883d..47cd0c037433 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -511,15 +511,15 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
poll_wait(filp, &stdev->event_wq, wait);
if (lock_mutex_and_test_alive(stdev))
- return POLLIN | POLLRDHUP | POLLOUT | POLLERR | POLLHUP;
+ return EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLERR | EPOLLHUP;
mutex_unlock(&stdev->mrpc_mutex);
if (try_wait_for_completion(&stuser->comp))
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
- ret |= POLLPRI | POLLRDBAND;
+ ret |= EPOLLPRI | EPOLLRDBAND;
return ret;
}
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index d8599736a41a..6dec6ab13300 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -423,7 +423,7 @@ static int chromeos_laptop_probe(struct platform_device *pdev)
return ret;
}
-static struct chromeos_laptop samsung_series_5_550 = {
+static const struct chromeos_laptop samsung_series_5_550 = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
@@ -432,14 +432,14 @@ static struct chromeos_laptop samsung_series_5_550 = {
},
};
-static struct chromeos_laptop samsung_series_5 = {
+static const struct chromeos_laptop samsung_series_5 = {
.i2c_peripherals = {
/* Light Sensor. */
{ .add = setup_tsl2583_als, I2C_ADAPTER_SMBUS },
},
};
-static struct chromeos_laptop chromebook_pixel = {
+static const struct chromeos_laptop chromebook_pixel = {
.i2c_peripherals = {
/* Touch Screen. */
{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_PANEL },
@@ -450,14 +450,14 @@ static struct chromeos_laptop chromebook_pixel = {
},
};
-static struct chromeos_laptop hp_chromebook_14 = {
+static const struct chromeos_laptop hp_chromebook_14 = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
},
};
-static struct chromeos_laptop dell_chromebook_11 = {
+static const struct chromeos_laptop dell_chromebook_11 = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
@@ -466,28 +466,28 @@ static struct chromeos_laptop dell_chromebook_11 = {
},
};
-static struct chromeos_laptop toshiba_cb35 = {
+static const struct chromeos_laptop toshiba_cb35 = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
},
};
-static struct chromeos_laptop acer_c7_chromebook = {
+static const struct chromeos_laptop acer_c7_chromebook = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
},
};
-static struct chromeos_laptop acer_ac700 = {
+static const struct chromeos_laptop acer_ac700 = {
.i2c_peripherals = {
/* Light Sensor. */
{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
},
};
-static struct chromeos_laptop acer_c720 = {
+static const struct chromeos_laptop acer_c720 = {
.i2c_peripherals = {
/* Touchscreen. */
{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_DESIGNWARE_1 },
@@ -500,14 +500,14 @@ static struct chromeos_laptop acer_c720 = {
},
};
-static struct chromeos_laptop hp_pavilion_14_chromebook = {
+static const struct chromeos_laptop hp_pavilion_14_chromebook = {
.i2c_peripherals = {
/* Touchpad. */
{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
},
};
-static struct chromeos_laptop cr48 = {
+static const struct chromeos_laptop cr48 = {
.i2c_peripherals = {
/* Light Sensor. */
{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 5473e602f7e0..0e88e18362c1 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -200,7 +200,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file,
if (CIRC_CNT(debug_info->log_buffer.head,
debug_info->log_buffer.tail,
LOG_SIZE))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&debug_info->log_mutex);
return mask;
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 1baf720faf69..af89e82eecd2 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -35,6 +35,9 @@
#define DRV_NAME "cros_ec_lpcs"
#define ACPI_DRV_NAME "GOOG0004"
+/* True if ACPI device is present */
+static bool cros_ec_lpc_acpi_device_found;
+
static int ec_response_timed_out(void)
{
unsigned long one_second = jiffies + HZ;
@@ -54,7 +57,6 @@ static int ec_response_timed_out(void)
static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
struct cros_ec_command *msg)
{
- struct ec_host_request *request;
struct ec_host_response response;
u8 sum;
int ret = 0;
@@ -65,8 +67,6 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
/* Write buffer */
cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PACKET, ret, ec->dout);
- request = (struct ec_host_request *)ec->dout;
-
/* Here we go */
sum = EC_COMMAND_PROTOCOL_3;
cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
@@ -362,6 +362,13 @@ static const struct dmi_system_id cros_ec_lpc_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "Peppy"),
},
},
+ {
+ /* x86-glimmer, the Lenovo Thinkpad Yoga 11e. */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Glimmer"),
+ },
+ },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
@@ -396,9 +403,21 @@ static struct platform_driver cros_ec_lpc_driver = {
.remove = cros_ec_lpc_remove,
};
+static struct platform_device cros_ec_lpc_device = {
+ .name = DRV_NAME
+};
+
+static acpi_status cros_ec_lpc_parse_device(acpi_handle handle, u32 level,
+ void *context, void **retval)
+{
+ *(bool *)context = true;
+ return AE_CTRL_TERMINATE;
+}
+
static int __init cros_ec_lpc_init(void)
{
int ret;
+ acpi_status status;
if (!dmi_check_system(cros_ec_lpc_dmi_table)) {
pr_err(DRV_NAME ": unsupported system.\n");
@@ -415,11 +434,28 @@ static int __init cros_ec_lpc_init(void)
return ret;
}
- return 0;
+ status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
+ &cros_ec_lpc_acpi_device_found, NULL);
+ if (ACPI_FAILURE(status))
+ pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
+
+ if (!cros_ec_lpc_acpi_device_found) {
+ /* Register the device, and it'll get hooked up automatically */
+ ret = platform_device_register(&cros_ec_lpc_device);
+ if (ret) {
+ pr_err(DRV_NAME ": can't register device: %d\n", ret);
+ platform_driver_unregister(&cros_ec_lpc_driver);
+ cros_ec_lpc_reg_destroy();
+ }
+ }
+
+ return ret;
}
static void __exit cros_ec_lpc_exit(void)
{
+ if (!cros_ec_lpc_acpi_device_found)
+ platform_device_unregister(&cros_ec_lpc_device);
platform_driver_unregister(&cros_ec_lpc_driver);
cros_ec_lpc_reg_destroy();
}
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 8dfa7fcb1248..e7bbdf947bbc 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -60,12 +60,14 @@ static int send_command(struct cros_ec_device *ec_dev,
struct cros_ec_command *msg)
{
int ret;
+ int (*xfer_fxn)(struct cros_ec_device *ec, struct cros_ec_command *msg);
if (ec_dev->proto_version > 2)
- ret = ec_dev->pkt_xfer(ec_dev, msg);
+ xfer_fxn = ec_dev->pkt_xfer;
else
- ret = ec_dev->cmd_xfer(ec_dev, msg);
+ xfer_fxn = ec_dev->cmd_xfer;
+ ret = (*xfer_fxn)(ec_dev, msg);
if (msg->result == EC_RES_IN_PROGRESS) {
int i;
struct cros_ec_command *status_msg;
@@ -88,7 +90,7 @@ static int send_command(struct cros_ec_device *ec_dev,
for (i = 0; i < EC_COMMAND_RETRIES; i++) {
usleep_range(10000, 11000);
- ret = ec_dev->cmd_xfer(ec_dev, status_msg);
+ ret = (*xfer_fxn)(ec_dev, status_msg);
if (ret < 0)
break;
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index d6eebe872187..da0a719d32f7 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -185,7 +185,7 @@ static ssize_t show_ec_version(struct device *dev,
count += scnprintf(buf + count, PAGE_SIZE - count,
"Build info: EC error %d\n", msg->result);
else {
- msg->data[sizeof(msg->data) - 1] = '\0';
+ msg->data[EC_HOST_PARAM_SIZE - 1] = '\0';
count += scnprintf(buf + count, PAGE_SIZE - count,
"Build info: %s\n", msg->data);
}
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 999f1152655a..3e32a4c14d5f 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -549,13 +549,13 @@ static __poll_t goldfish_pipe_poll(struct file *filp, poll_table *wait)
return -ERESTARTSYS;
if (status & PIPE_POLL_IN)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (status & PIPE_POLL_OUT)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (status & PIPE_POLL_HUP)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index 0dfa1ca0d05b..313cf8ad77bf 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -300,7 +300,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
{
struct mlxreg_core_data *data = item->data;
u32 regval;
- int i, ret;
+ int i, ret = 0;
for (i = 0; i < item->count; i++, data++) {
/* Mask event. */
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index 27de29961f5e..454e14f02285 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -77,10 +77,13 @@
#define MLXPLAT_CPLD_AGGR_FAN_MASK_DEF 0x40
#define MLXPLAT_CPLD_AGGR_MASK_DEF (MLXPLAT_CPLD_AGGR_PSU_MASK_DEF | \
MLXPLAT_CPLD_AGGR_FAN_MASK_DEF)
+#define MLXPLAT_CPLD_AGGR_MASK_NG_DEF 0x04
+#define MLXPLAT_CPLD_LOW_AGGR_MASK_LOW 0xc0
#define MLXPLAT_CPLD_AGGR_MASK_MSN21XX 0x04
#define MLXPLAT_CPLD_PSU_MASK GENMASK(1, 0)
#define MLXPLAT_CPLD_PWR_MASK GENMASK(1, 0)
#define MLXPLAT_CPLD_FAN_MASK GENMASK(3, 0)
+#define MLXPLAT_CPLD_FAN_NG_MASK GENMASK(5, 0)
/* Start channel numbers */
#define MLXPLAT_CPLD_CH1 2
@@ -89,6 +92,15 @@
/* Number of LPC attached MUX platform devices */
#define MLXPLAT_CPLD_LPC_MUX_DEVS 2
+/* Hotplug devices adapter numbers */
+#define MLXPLAT_CPLD_NR_NONE -1
+#define MLXPLAT_CPLD_PSU_DEFAULT_NR 10
+#define MLXPLAT_CPLD_PSU_MSNXXXX_NR 4
+#define MLXPLAT_CPLD_FAN1_DEFAULT_NR 11
+#define MLXPLAT_CPLD_FAN2_DEFAULT_NR 12
+#define MLXPLAT_CPLD_FAN3_DEFAULT_NR 13
+#define MLXPLAT_CPLD_FAN4_DEFAULT_NR 14
+
/* mlxplat_priv - platform private data
* @pdev_i2c - i2c controller platform device
* @pdev_mux - array of mux platform devices
@@ -159,6 +171,15 @@ static struct i2c_board_info mlxplat_mlxcpld_psu[] = {
},
};
+static struct i2c_board_info mlxplat_mlxcpld_ng_psu[] = {
+ {
+ I2C_BOARD_INFO("24c32", 0x51),
+ },
+ {
+ I2C_BOARD_INFO("24c32", 0x50),
+ },
+};
+
static struct i2c_board_info mlxplat_mlxcpld_pwr[] = {
{
I2C_BOARD_INFO("dps460", 0x59),
@@ -190,14 +211,14 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_psu_items_data[] = {
.reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
.mask = BIT(0),
.hpdev.brdinfo = &mlxplat_mlxcpld_psu[0],
- .hpdev.nr = 10,
+ .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR,
},
{
.label = "psu2",
.reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
.mask = BIT(1),
.hpdev.brdinfo = &mlxplat_mlxcpld_psu[1],
- .hpdev.nr = 10,
+ .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR,
},
};
@@ -207,14 +228,14 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_pwr_items_data[] = {
.reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
.mask = BIT(0),
.hpdev.brdinfo = &mlxplat_mlxcpld_pwr[0],
- .hpdev.nr = 10,
+ .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR,
},
{
.label = "pwr2",
.reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
.mask = BIT(1),
.hpdev.brdinfo = &mlxplat_mlxcpld_pwr[1],
- .hpdev.nr = 10,
+ .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR,
},
};
@@ -224,28 +245,28 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_fan_items_data[] = {
.reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
.mask = BIT(0),
.hpdev.brdinfo = &mlxplat_mlxcpld_fan[0],
- .hpdev.nr = 11,
+ .hpdev.nr = MLXPLAT_CPLD_FAN1_DEFAULT_NR,
},
{
.label = "fan2",
.reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
.mask = BIT(1),
.hpdev.brdinfo = &mlxplat_mlxcpld_fan[1],
- .hpdev.nr = 12,
+ .hpdev.nr = MLXPLAT_CPLD_FAN2_DEFAULT_NR,
},
{
.label = "fan3",
.reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
.mask = BIT(2),
.hpdev.brdinfo = &mlxplat_mlxcpld_fan[2],
- .hpdev.nr = 13,
+ .hpdev.nr = MLXPLAT_CPLD_FAN3_DEFAULT_NR,
},
{
.label = "fan4",
.reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
.mask = BIT(3),
.hpdev.brdinfo = &mlxplat_mlxcpld_fan[3],
- .hpdev.nr = 14,
+ .hpdev.nr = MLXPLAT_CPLD_FAN4_DEFAULT_NR,
},
};
@@ -287,14 +308,29 @@ struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_default_data = {
.mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
};
+static struct mlxreg_core_data mlxplat_mlxcpld_msn21xx_pwr_items_data[] = {
+ {
+ .label = "pwr1",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(0),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "pwr2",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(1),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+};
+
/* Platform hotplug MSN21xx system family data */
static struct mlxreg_core_item mlxplat_mlxcpld_msn21xx_items[] = {
{
- .data = mlxplat_mlxcpld_default_pwr_items_data,
+ .data = mlxplat_mlxcpld_msn21xx_pwr_items_data,
.aggr_mask = MLXPLAT_CPLD_AGGR_PWR_MASK_DEF,
.reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
.mask = MLXPLAT_CPLD_PWR_MASK,
- .count = ARRAY_SIZE(mlxplat_mlxcpld_pwr),
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_msn21xx_pwr_items_data),
.inversed = 0,
.health = false,
},
@@ -306,6 +342,245 @@ struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_msn21xx_data = {
.counter = ARRAY_SIZE(mlxplat_mlxcpld_msn21xx_items),
.cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
.mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
+ .cell_low = MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET,
+ .mask_low = MLXPLAT_CPLD_LOW_AGGR_MASK_LOW,
+};
+
+/* Platform hotplug msn274x system family data */
+static struct mlxreg_core_data mlxplat_mlxcpld_msn274x_psu_items_data[] = {
+ {
+ .label = "psu1",
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = BIT(0),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_psu[0],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+ {
+ .label = "psu2",
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = BIT(1),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_psu[1],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+};
+
+static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_pwr_items_data[] = {
+ {
+ .label = "pwr1",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(0),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[0],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+ {
+ .label = "pwr2",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(1),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[1],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+};
+
+static struct mlxreg_core_data mlxplat_mlxcpld_msn274x_fan_items_data[] = {
+ {
+ .label = "fan1",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(0),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan2",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(1),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan3",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(2),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan4",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(3),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+};
+
+static struct mlxreg_core_item mlxplat_mlxcpld_msn274x_items[] = {
+ {
+ .data = mlxplat_mlxcpld_msn274x_psu_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = MLXPLAT_CPLD_PSU_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_msn274x_psu_items_data),
+ .inversed = 1,
+ .health = false,
+ },
+ {
+ .data = mlxplat_mlxcpld_default_ng_pwr_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = MLXPLAT_CPLD_PWR_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_default_ng_pwr_items_data),
+ .inversed = 0,
+ .health = false,
+ },
+ {
+ .data = mlxplat_mlxcpld_msn274x_fan_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = MLXPLAT_CPLD_FAN_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_msn274x_fan_items_data),
+ .inversed = 1,
+ .health = false,
+ },
+};
+
+static
+struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_msn274x_data = {
+ .items = mlxplat_mlxcpld_msn274x_items,
+ .counter = ARRAY_SIZE(mlxplat_mlxcpld_msn274x_items),
+ .cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
+ .mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .cell_low = MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET,
+ .mask_low = MLXPLAT_CPLD_LOW_AGGR_MASK_LOW,
+};
+
+/* Platform hotplug MSN201x system family data */
+static struct mlxreg_core_data mlxplat_mlxcpld_msn201x_pwr_items_data[] = {
+ {
+ .label = "pwr1",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(0),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "pwr2",
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = BIT(1),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+};
+
+static struct mlxreg_core_item mlxplat_mlxcpld_msn201x_items[] = {
+ {
+ .data = mlxplat_mlxcpld_msn201x_pwr_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_PWR_MASK_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = MLXPLAT_CPLD_PWR_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_msn201x_pwr_items_data),
+ .inversed = 0,
+ .health = false,
+ },
+};
+
+static
+struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_msn201x_data = {
+ .items = mlxplat_mlxcpld_msn21xx_items,
+ .counter = ARRAY_SIZE(mlxplat_mlxcpld_msn201x_items),
+ .cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
+ .mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
+ .cell_low = MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET,
+ .mask_low = MLXPLAT_CPLD_LOW_AGGR_MASK_LOW,
+};
+
+/* Platform hotplug next generation system family data */
+static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_psu_items_data[] = {
+ {
+ .label = "psu1",
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = BIT(0),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_ng_psu[0],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+ {
+ .label = "psu2",
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = BIT(1),
+ .hpdev.brdinfo = &mlxplat_mlxcpld_ng_psu[1],
+ .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
+ },
+};
+
+static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_fan_items_data[] = {
+ {
+ .label = "fan1",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(0),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan2",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(1),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan3",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(2),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan4",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(3),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan5",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(4),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+ {
+ .label = "fan6",
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = BIT(5),
+ .hpdev.nr = MLXPLAT_CPLD_NR_NONE,
+ },
+};
+
+static struct mlxreg_core_item mlxplat_mlxcpld_default_ng_items[] = {
+ {
+ .data = mlxplat_mlxcpld_default_ng_psu_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET,
+ .mask = MLXPLAT_CPLD_PSU_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_default_ng_psu_items_data),
+ .inversed = 1,
+ .health = false,
+ },
+ {
+ .data = mlxplat_mlxcpld_default_ng_pwr_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
+ .mask = MLXPLAT_CPLD_PWR_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_default_ng_pwr_items_data),
+ .inversed = 0,
+ .health = false,
+ },
+ {
+ .data = mlxplat_mlxcpld_default_ng_fan_items_data,
+ .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET,
+ .mask = MLXPLAT_CPLD_FAN_NG_MASK,
+ .count = ARRAY_SIZE(mlxplat_mlxcpld_default_ng_fan_items_data),
+ .inversed = 1,
+ .health = false,
+ },
+};
+
+static
+struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_default_ng_data = {
+ .items = mlxplat_mlxcpld_default_ng_items,
+ .counter = ARRAY_SIZE(mlxplat_mlxcpld_default_ng_items),
+ .cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
+ .mask = MLXPLAT_CPLD_AGGR_MASK_NG_DEF,
+ .cell_low = MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET,
+ .mask_low = MLXPLAT_CPLD_LOW_AGGR_MASK_LOW,
};
static bool mlxplat_mlxcpld_writeable_reg(struct device *dev, unsigned int reg)
@@ -437,8 +712,57 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
return 1;
};
+static int __init mlxplat_dmi_msn274x_matched(const struct dmi_system_id *dmi)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
+ mlxplat_mux_data[i].n_values =
+ ARRAY_SIZE(mlxplat_msn21xx_channels);
+ }
+ mlxplat_hotplug = &mlxplat_mlxcpld_msn274x_data;
+
+ return 1;
+};
+
+static int __init mlxplat_dmi_msn201x_matched(const struct dmi_system_id *dmi)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
+ mlxplat_mux_data[i].n_values =
+ ARRAY_SIZE(mlxplat_msn21xx_channels);
+ }
+ mlxplat_hotplug = &mlxplat_mlxcpld_msn201x_data;
+
+ return 1;
+};
+
+static int __init mlxplat_dmi_qmb7xx_matched(const struct dmi_system_id *dmi)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
+ mlxplat_mux_data[i].n_values =
+ ARRAY_SIZE(mlxplat_msn21xx_channels);
+ }
+ mlxplat_hotplug = &mlxplat_mlxcpld_default_ng_data;
+
+ return 1;
+};
+
static const struct dmi_system_id mlxplat_dmi_table[] __initconst = {
{
+ .callback = mlxplat_dmi_msn274x_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSN274"),
+ },
+ },
+ {
.callback = mlxplat_dmi_default_matched,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
@@ -473,6 +797,34 @@ static const struct dmi_system_id mlxplat_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "MSN21"),
},
},
+ {
+ .callback = mlxplat_dmi_msn201x_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSN201"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_qmb7xx_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "QMB7"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_qmb7xx_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "SN37"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_qmb7xx_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "SN34"),
+ },
+ },
{ }
};
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a4fabf9d75f3..b205b037fd61 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -4128,7 +4128,7 @@ static __poll_t sonypi_misc_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &sonypi_compat.fifo_proc_list, wait);
if (kfifo_len(&sonypi_compat.fifo))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 1d42385b1aa5..8febacb8fc54 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -55,7 +55,7 @@ static __poll_t pps_cdev_poll(struct file *file, poll_table *wait)
poll_wait(file, &pps->queue, wait);
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
static int pps_cdev_fasync(int fd, struct file *file, int on)
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index a593b4cf47bf..767c485af59b 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -286,7 +286,7 @@ __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
poll_wait(fp, &ptp->tsev_wq, wait);
- return queue_cnt(&ptp->tsevq) ? POLLIN : 0;
+ return queue_cnt(&ptp->tsevq) ? EPOLLIN : 0;
}
#define EXTTS_BUFSIZE (PTP_BUF_TIMESTAMPS * sizeof(struct ptp_extts_event))
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index 6092b3a5978e..cfb54e01d758 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -2325,7 +2325,7 @@ static __poll_t mport_cdev_poll(struct file *filp, poll_table *wait)
poll_wait(filp, &priv->event_rx_wait, wait);
if (kfifo_len(&priv->event_fifo))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 8428eba8cb73..92d0c6a7a837 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -967,7 +967,7 @@ static __poll_t qcom_smd_poll(struct rpmsg_endpoint *ept,
poll_wait(filp, &channel->fblockread_event, wait);
if (qcom_smd_get_tx_avail(channel) > 20)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index e622fcda30fa..64b6de9763ee 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -262,12 +262,12 @@ static __poll_t rpmsg_eptdev_poll(struct file *filp, poll_table *wait)
__poll_t mask = 0;
if (!eptdev->ept)
- return POLLERR;
+ return EPOLLERR;
poll_wait(filp, &eptdev->readq, wait);
if (!skb_queue_empty(&eptdev->queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
mask |= rpmsg_poll(eptdev->ept, filp, wait);
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 5a7b30d0773b..efa221e8bc22 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -203,7 +203,7 @@ static __poll_t rtc_dev_poll(struct file *file, poll_table *wait)
data = rtc->irq_data;
- return (data != 0) ? (POLLIN | POLLRDNORM) : 0;
+ return (data != 0) ? (EPOLLIN | EPOLLRDNORM) : 0;
}
static long rtc_dev_ioctl(struct file *file,
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index 0c075d100252..fb2c3599d95c 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -671,7 +671,7 @@ static __poll_t dasd_eer_poll(struct file *filp, poll_table *ptable)
poll_wait(filp, &dasd_eer_read_wait_queue, ptable);
spin_lock_irqsave(&bufferlock, flags);
if (eerb->head != eerb->tail)
- mask = POLLIN | POLLRDNORM ;
+ mask = EPOLLIN | EPOLLRDNORM ;
else
mask = 0;
spin_unlock_irqrestore(&bufferlock, flags);
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 956f662908a6..7bc616b253f1 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -435,9 +435,9 @@ static __poll_t mon_poll(struct file *filp, struct poll_table_struct *p)
poll_wait(filp, &mon_read_wait_queue, p);
if (unlikely(atomic_read(&monpriv->iucv_severed)))
- return POLLERR;
+ return EPOLLERR;
if (atomic_read(&monpriv->read_ready))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index d06bc5674e5f..6b1891539c84 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -49,7 +49,7 @@ struct read_info_sccb {
u8 _pad_112[116 - 112]; /* 112-115 */
u8 fac116; /* 116 */
u8 fac117; /* 117 */
- u8 _pad_118; /* 118 */
+ u8 fac118; /* 118 */
u8 fac119; /* 119 */
u16 hcpua; /* 120-121 */
u8 _pad_122[124 - 122]; /* 122-123 */
@@ -100,6 +100,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
sclp.has_esca = !!(sccb->fac116 & 0x08);
sclp.has_pfmfi = !!(sccb->fac117 & 0x40);
sclp.has_ibs = !!(sccb->fac117 & 0x20);
+ sclp.has_gisaf = !!(sccb->fac118 & 0x08);
sclp.has_hvs = !!(sccb->fac119 & 0x80);
sclp.has_kss = !!(sccb->fac98 & 0x01);
if (sccb->fac85 & 0x02)
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6198559abbd8..0ad00dbf912d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
struct sockaddr_in6 addr;
- int rc, len;
+ int rc;
switch(param) {
case ISCSI_PARAM_CONN_PORT:
@@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
}
if (param == ISCSI_PARAM_LOCAL_PORT)
rc = kernel_getsockname(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
else
rc = kernel_getpeername(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
spin_unlock_bh(&conn->session->frwd_lock);
- if (rc)
+ if (rc < 0)
return rc;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
@@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
struct iscsi_tcp_conn *tcp_conn;
struct iscsi_sw_tcp_conn *tcp_sw_conn;
struct sockaddr_in6 addr;
- int rc, len;
+ int rc;
switch (param) {
case ISCSI_HOST_PARAM_IPADDRESS:
@@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
}
rc = kernel_getsockname(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
spin_unlock_bh(&session->frwd_lock);
- if (rc)
+ if (rc < 0)
return rc;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 2791141bd035..a71ee67df084 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -7041,7 +7041,7 @@ static __poll_t megasas_mgmt_poll(struct file *file, poll_table *wait)
poll_wait(file, &megasas_poll_wait, wait);
spin_lock_irqsave(&poll_aen_lock, flags);
if (megasas_poll_wait_aen)
- mask = (POLLIN | POLLRDNORM);
+ mask = (EPOLLIN | EPOLLRDNORM);
else
mask = 0;
megasas_poll_wait_aen = 0;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 9cddc3074cd1..523971aeb4c1 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -546,7 +546,7 @@ _ctl_poll(struct file *filep, poll_table *wait)
list_for_each_entry(ioc, &mpt3sas_ioc_list, list) {
if (ioc->aen_event_read_flag) {
spin_unlock(&gioc_lock);
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
}
spin_unlock(&gioc_lock);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0c434453aab3..c198b96368dd 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1152,27 +1152,27 @@ sg_poll(struct file *filp, poll_table * wait)
sfp = filp->private_data;
if (!sfp)
- return POLLERR;
+ return EPOLLERR;
sdp = sfp->parentdp;
if (!sdp)
- return POLLERR;
+ return EPOLLERR;
poll_wait(filp, &sfp->read_wait, wait);
read_lock_irqsave(&sfp->rq_list_lock, iflags);
list_for_each_entry(srp, &sfp->rq_list, entry) {
/* if any read waiting, flag it */
if ((0 == res) && (1 == srp->done) && (!srp->sg_io_owned))
- res = POLLIN | POLLRDNORM;
+ res = EPOLLIN | EPOLLRDNORM;
++count;
}
read_unlock_irqrestore(&sfp->rq_list_lock, iflags);
if (atomic_read(&sdp->detaching))
- res |= POLLHUP;
+ res |= EPOLLHUP;
else if (!sfp->cmd_q) {
if (0 == count)
- res |= POLLOUT | POLLWRNORM;
+ res |= EPOLLOUT | EPOLLWRNORM;
} else if (count < SG_MAX_QUEUE)
- res |= POLLOUT | POLLWRNORM;
+ res |= EPOLLOUT | EPOLLWRNORM;
SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp,
"sg_poll: res=0x%x\n", (__force u32) res));
return res;
diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c
index 877611d5c42b..321982277697 100644
--- a/drivers/soc/qcom/qmi_interface.c
+++ b/drivers/soc/qcom/qmi_interface.c
@@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
struct sockaddr_qrtr *sq)
{
struct socket *sock;
- int sl = sizeof(*sq);
int ret;
ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
@@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
if (ret < 0)
return ERR_PTR(ret);
- ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl);
+ ret = kernel_getsockname(sock, (struct sockaddr *)sq);
if (ret < 0) {
sock_release(sock);
return ERR_PTR(ret);
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index ef733847eebe..c13772a0df58 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -2288,7 +2288,7 @@ static __poll_t comedi_poll(struct file *file, poll_table *wait)
if (s->busy != file || !comedi_is_subdevice_running(s) ||
(s->async->cmd.flags & CMDF_WRITE) ||
comedi_buf_read_n_available(s) > 0)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
s = comedi_file_write_subdevice(file);
@@ -2300,7 +2300,7 @@ static __poll_t comedi_poll(struct file *file, poll_table *wait)
if (s->busy != file || !comedi_is_subdevice_running(s) ||
!(s->async->cmd.flags & CMDF_WRITE) ||
comedi_buf_write_n_available(s) >= bps)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
done:
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index ab69eeb2c1f1..b3f3b4a201af 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -114,8 +114,8 @@ static void serial2002_tty_read_poll_wait(struct file *f, int timeout)
__poll_t mask;
mask = f->f_op->poll(f, &table.pt);
- if (mask & (POLLRDNORM | POLLRDBAND | POLLIN |
- POLLHUP | POLLERR)) {
+ if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
+ EPOLLHUP | EPOLLERR)) {
break;
}
now = ktime_get();
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index 1993b03a6f2d..e8bfe5520bc7 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -37,7 +37,7 @@ module_param_named(loop, create_loop_dev, bool, 0644);
/*
* Threshold below which the tty is woken for writing
* - should be equal to WAKEUP_CHARS in drivers/tty/n_tty.c because
- * even if the writer is woken, n_tty_poll() won't set POLLOUT until
+ * even if the writer is woken, n_tty_poll() won't set EPOLLOUT until
* our fifo is below this level
*/
#define WAKEUP_CHARS 256
diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index c51610ce24af..b82e2befe935 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -663,7 +663,7 @@ static int open_poll_files(struct loopback_test *t)
goto err;
}
read(t->fds[fds_idx].fd, &dummy, 1);
- t->fds[fds_idx].events = POLLERR|POLLPRI;
+ t->fds[fds_idx].events = EPOLLERR|EPOLLPRI;
t->fds[fds_idx].revents = 0;
fds_idx++;
}
@@ -756,7 +756,7 @@ static int wait_for_complete(struct loopback_test *t)
}
for (i = 0; i < t->poll_count; i++) {
- if (t->fds[i].revents & POLLPRI) {
+ if (t->fds[i].revents & EPOLLPRI) {
/* Dummy read to clear the event */
read(t->fds[i].fd, &dummy, 1);
number_of_events++;
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index d21a9d128d3e..5703dd176787 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1577,7 +1577,7 @@ out:
static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct ipx_address *addr;
struct sockaddr_ipx sipx;
@@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
struct ipx_sock *ipxs = ipx_sk(sk);
int rc;
- *uaddr_len = sizeof(struct sockaddr_ipx);
-
lock_sock(sk);
if (peer) {
rc = -ENOTCONN;
@@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
sipx.sipx_zero = 0;
memcpy(uaddr, &sipx, sizeof(sipx));
- rc = 0;
+ rc = sizeof(struct sockaddr_ipx);
out:
release_sock(sk);
return rc;
diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c
index f1d128b2dae9..c13553a9ee11 100644
--- a/drivers/staging/irda/net/af_irda.c
+++ b/drivers/staging/irda/net/af_irda.c
@@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
*
*/
static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_irda saddr;
struct sock *sk = sock->sk;
@@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
- /* uaddr_len come to us uninitialised */
- *uaddr_len = sizeof (struct sockaddr_irda);
- memcpy(uaddr, &saddr, *uaddr_len);
+ memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
- return 0;
+ return sizeof (struct sockaddr_irda);
}
/*
@@ -1749,16 +1747,16 @@ static __poll_t irda_poll(struct file * file, struct socket *sock,
/* Exceptional events? */
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN) {
pr_debug("%s(), POLLHUP\n", __func__);
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
}
/* Readable? */
if (!skb_queue_empty(&sk->sk_receive_queue)) {
pr_debug("Socket is readable\n");
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
/* Connection-based need to check for termination and startup */
@@ -1766,14 +1764,14 @@ static __poll_t irda_poll(struct file * file, struct socket *sock,
case SOCK_STREAM:
if (sk->sk_state == TCP_CLOSE) {
pr_debug("%s(), POLLHUP\n", __func__);
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
}
if (sk->sk_state == TCP_ESTABLISHED) {
if ((self->tx_flow == FLOW_START) &&
sock_writeable(sk))
{
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
}
}
break;
@@ -1781,12 +1779,12 @@ static __poll_t irda_poll(struct file * file, struct socket *sock,
if ((self->tx_flow == FLOW_START) &&
sock_writeable(sk))
{
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
}
break;
case SOCK_DGRAM:
if (sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
break;
default:
break;
diff --git a/drivers/staging/irda/net/irnet/irnet_ppp.c b/drivers/staging/irda/net/irnet/irnet_ppp.c
index 75bf9e34311d..c90a158af4b7 100644
--- a/drivers/staging/irda/net/irnet/irnet_ppp.c
+++ b/drivers/staging/irda/net/irnet/irnet_ppp.c
@@ -429,10 +429,10 @@ irnet_ctrl_poll(irnet_socket * ap,
DENTER(CTRL_TRACE, "(ap=0x%p)\n", ap);
poll_wait(file, &irnet_events.rwait, wait);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
/* If there is unread events */
if(ap->event_index != irnet_events.index)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
#ifdef INITIAL_DISCOVERY
if(ap->disco_number != -1)
{
@@ -441,7 +441,7 @@ irnet_ctrl_poll(irnet_socket * ap,
irnet_get_discovery_log(ap);
/* Recheck */
if(ap->disco_number != -1)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
#endif /* INITIAL_DISCOVERY */
@@ -618,7 +618,7 @@ dev_irnet_poll(struct file * file,
DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
file, ap);
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
DABORT(ap == NULL, mask, FS_ERROR, "ap is NULL !!!\n");
/* If we are connected to ppp_generic, let it handle the job */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index ce93806eefca..1bee667802b0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -448,14 +448,13 @@ int
lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
{
struct sockaddr_in sin;
- int len = sizeof(sin);
int rc;
if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+ rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
- if (rc) {
+ rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
+ if (rc < 0) {
CERROR("Error %d getting sock %s IP/port\n",
rc, remote ? "peer" : "local");
return rc;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
index 6657ebbe068a..4f9f9dca5e6a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
@@ -1265,7 +1265,7 @@ static __poll_t atomisp_poll(struct file *file,
rt_mutex_lock(&isp->mutex);
if (pipe->capq.streaming != 1) {
rt_mutex_unlock(&isp->mutex);
- return POLLERR;
+ return EPOLLERR;
}
rt_mutex_unlock(&isp->mutex);
diff --git a/drivers/staging/media/bcm2048/radio-bcm2048.c b/drivers/staging/media/bcm2048/radio-bcm2048.c
index 4ffff6f8b809..06d1920150da 100644
--- a/drivers/staging/media/bcm2048/radio-bcm2048.c
+++ b/drivers/staging/media/bcm2048/radio-bcm2048.c
@@ -2183,7 +2183,7 @@ static __poll_t bcm2048_fops_poll(struct file *file,
poll_wait(file, &bdev->read_queue, pts);
if (bdev->rds_data_available)
- retval = POLLIN | POLLRDNORM;
+ retval = EPOLLIN | EPOLLRDNORM;
return retval;
}
diff --git a/drivers/staging/most/cdev/cdev.c b/drivers/staging/most/cdev/cdev.c
index c183489c4a1c..4d7fce8731fe 100644
--- a/drivers/staging/most/cdev/cdev.c
+++ b/drivers/staging/most/cdev/cdev.c
@@ -292,10 +292,10 @@ static __poll_t comp_poll(struct file *filp, poll_table *wait)
if (c->cfg->direction == MOST_CH_RX) {
if (!kfifo_is_empty(&c->fifo))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
} else {
if (!kfifo_is_empty(&c->fifo) || ch_has_mbo(c))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
}
diff --git a/drivers/staging/most/video/video.c b/drivers/staging/most/video/video.c
index ef23e8524b1e..9d7e747519d9 100644
--- a/drivers/staging/most/video/video.c
+++ b/drivers/staging/most/video/video.c
@@ -213,7 +213,7 @@ static __poll_t comp_vdev_poll(struct file *filp, poll_table *wait)
if (!data_ready(mdev))
poll_wait(filp, &mdev->wait_data, wait);
if (data_ready(mdev))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
index 0e74d09e18ea..0a1a7c259ab0 100644
--- a/drivers/staging/speakup/speakup_soft.c
+++ b/drivers/staging/speakup/speakup_soft.c
@@ -325,7 +325,7 @@ static __poll_t softsynth_poll(struct file *fp, struct poll_table_struct *wait)
spin_lock_irqsave(&speakup_info.spinlock, flags);
if (!synth_buffer_empty() || speakup_info.flushing)
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return ret;
}
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 64c5a57b92e4..99501785cdc1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
struct socket *new_sock, *sock = np->np_socket;
struct sockaddr_in sock_in;
struct sockaddr_in6 sock_in6;
- int rc, err;
+ int rc;
rc = kernel_accept(sock, &new_sock, 0);
if (rc < 0)
@@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in6, &err, 1);
- if (!rc) {
+ (struct sockaddr *)&sock_in6, 1);
+ if (rc >= 0) {
if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
} else {
@@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
}
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in6, &err, 0);
- if (!rc) {
+ (struct sockaddr *)&sock_in6, 0);
+ if (rc >= 0) {
if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
} else {
@@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
memset(&sock_in, 0, sizeof(struct sockaddr_in));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in, &err, 1);
- if (!rc)
+ (struct sockaddr *)&sock_in, 1);
+ if (rc >= 0)
memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in, &err, 0);
- if (!rc)
+ (struct sockaddr *)&sock_in, 0);
+ if (rc >= 0)
memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
}
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 3b3af7e0ce1c..3b3e1f6632d7 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2477,11 +2477,11 @@ static __poll_t gsmld_poll(struct tty_struct *tty, struct file *file,
poll_wait(file, &tty->read_wait, wait);
poll_wait(file, &tty->write_wait, wait);
if (tty_hung_up_p(file))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (gsm->dead)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
return mask;
}
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 929434ebee50..dabb391909aa 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -814,14 +814,14 @@ static __poll_t n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
/* set bits for operations that won't block */
if (!list_empty(&n_hdlc->rx_buf_list.list))
- mask |= POLLIN | POLLRDNORM; /* readable */
+ mask |= EPOLLIN | EPOLLRDNORM; /* readable */
if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (tty_hung_up_p(filp))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!tty_is_writelocked(tty) &&
!list_empty(&n_hdlc->tx_free_buf_list.list))
- mask |= POLLOUT | POLLWRNORM; /* writable */
+ mask |= EPOLLOUT | EPOLLWRNORM; /* writable */
}
return mask;
} /* end of n_hdlc_tty_poll() */
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index e81d3db8ad63..dbf1ab36758e 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -1223,7 +1223,7 @@ static __poll_t r3964_poll(struct tty_struct *tty, struct file *file,
struct r3964_client_info *pClient;
struct r3964_message *pMsg = NULL;
unsigned long flags;
- __poll_t result = POLLOUT;
+ __poll_t result = EPOLLOUT;
TRACE_L("POLL");
@@ -1234,7 +1234,7 @@ static __poll_t r3964_poll(struct tty_struct *tty, struct file *file,
pMsg = pClient->first_msg;
spin_unlock_irqrestore(&pInfo->lock, flags);
if (pMsg)
- result |= POLLIN | POLLRDNORM;
+ result |= EPOLLIN | EPOLLRDNORM;
} else {
result = -EINVAL;
}
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 478a9b40fd03..5c0e59e8fe46 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1344,7 +1344,7 @@ handle_newline:
put_tty_queue(c, ldata);
smp_store_release(&ldata->canon_head, ldata->read_head);
kill_fasync(&tty->fasync, SIGIO, POLL_IN);
- wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
return 0;
}
}
@@ -1625,7 +1625,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp,
if (read_cnt(ldata)) {
kill_fasync(&tty->fasync, SIGIO, POLL_IN);
- wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
}
}
@@ -2376,22 +2376,22 @@ static __poll_t n_tty_poll(struct tty_struct *tty, struct file *file,
poll_wait(file, &tty->read_wait, wait);
poll_wait(file, &tty->write_wait, wait);
if (input_available_p(tty, 1))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
else {
tty_buffer_flush_work(tty->port);
if (input_available_p(tty, 1))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (tty->packet && tty->link->ctrl_status)
- mask |= POLLPRI | POLLIN | POLLRDNORM;
+ mask |= EPOLLPRI | EPOLLIN | EPOLLRDNORM;
if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (tty_hung_up_p(file))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (tty->ops->write && !tty_is_writelocked(tty) &&
tty_chars_in_buffer(tty) < WAKEUP_CHARS &&
tty_write_room(tty) > 0)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 64338442050e..6c7151edd715 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -344,7 +344,7 @@ static void pty_start(struct tty_struct *tty)
tty->ctrl_status &= ~TIOCPKT_STOP;
tty->ctrl_status |= TIOCPKT_START;
spin_unlock_irqrestore(&tty->ctrl_lock, flags);
- wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+ wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN);
}
}
@@ -357,7 +357,7 @@ static void pty_stop(struct tty_struct *tty)
tty->ctrl_status &= ~TIOCPKT_START;
tty->ctrl_status |= TIOCPKT_STOP;
spin_unlock_irqrestore(&tty->ctrl_lock, flags);
- wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+ wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN);
}
}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 6a89835453d3..eb9133b472f4 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -445,7 +445,7 @@ static ssize_t hung_up_tty_write(struct file *file, const char __user *buf,
/* No kernel lock held - none needed ;) */
static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait)
{
- return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+ return EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | EPOLLWRNORM;
}
static long hung_up_tty_ioctl(struct file *file, unsigned int cmd,
@@ -533,7 +533,7 @@ void tty_wakeup(struct tty_struct *tty)
tty_ldisc_deref(ld);
}
}
- wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
}
EXPORT_SYMBOL_GPL(tty_wakeup);
@@ -867,7 +867,7 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
static void tty_write_unlock(struct tty_struct *tty)
{
mutex_unlock(&tty->atomic_write_lock);
- wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
}
static int tty_write_lock(struct tty_struct *tty, int ndelay)
@@ -1667,21 +1667,21 @@ int tty_release(struct inode *inode, struct file *filp)
if (tty->count <= 1) {
if (waitqueue_active(&tty->read_wait)) {
- wake_up_poll(&tty->read_wait, POLLIN);
+ wake_up_poll(&tty->read_wait, EPOLLIN);
do_sleep++;
}
if (waitqueue_active(&tty->write_wait)) {
- wake_up_poll(&tty->write_wait, POLLOUT);
+ wake_up_poll(&tty->write_wait, EPOLLOUT);
do_sleep++;
}
}
if (o_tty && o_tty->count <= 1) {
if (waitqueue_active(&o_tty->read_wait)) {
- wake_up_poll(&o_tty->read_wait, POLLIN);
+ wake_up_poll(&o_tty->read_wait, EPOLLIN);
do_sleep++;
}
if (waitqueue_active(&o_tty->write_wait)) {
- wake_up_poll(&o_tty->write_wait, POLLOUT);
+ wake_up_poll(&o_tty->write_wait, EPOLLOUT);
do_sleep++;
}
}
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 4e7946c0484b..050f4d650891 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -735,8 +735,8 @@ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit)
tty_ldisc_deref(ld);
}
- wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
- wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+ wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
+ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
/*
* Shutdown the current line discipline, and reset it to
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 3e64ccd0040f..e4a66e1fd05f 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -563,7 +563,7 @@ static __poll_t
vcs_poll(struct file *file, poll_table *wait)
{
struct vcs_poll_data *poll = vcs_poll_data_get(file);
- __poll_t ret = DEFAULT_POLLMASK|POLLERR|POLLPRI;
+ __poll_t ret = DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
if (poll) {
poll_wait(file, &poll->waitq, wait);
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 85bc1aaea4a4..fd4848392e0d 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -506,7 +506,7 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait)
poll_wait(filep, &idev->wait, wait);
if (listener->event_count != atomic_read(&idev->event))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 9627ea6ec3ae..a0d284ef3f40 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -603,16 +603,16 @@ static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
spin_lock_irqsave(&desc->iuspin, flags);
if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
- mask = POLLHUP | POLLERR;
+ mask = EPOLLHUP | EPOLLERR;
spin_unlock_irqrestore(&desc->iuspin, flags);
goto desc_out;
}
if (test_bit(WDM_READ, &desc->flags))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
if (desc->rerr || desc->werr)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (!test_bit(WDM_IN_USE, &desc->flags))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
spin_unlock_irqrestore(&desc->iuspin, flags);
poll_wait(file, &desc->wait, wait);
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 425247b7f728..d058d7a31e7c 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -479,8 +479,8 @@ static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait)
poll_wait(file, &usblp->rwait, wait);
poll_wait(file, &usblp->wwait, wait);
spin_lock_irqsave(&usblp->lock, flags);
- ret = ((usblp->bidir && usblp->rcomplete) ? POLLIN | POLLRDNORM : 0) |
- ((usblp->no_paper || usblp->wcomplete) ? POLLOUT | POLLWRNORM : 0);
+ ret = ((usblp->bidir && usblp->rcomplete) ? EPOLLIN | EPOLLRDNORM : 0) |
+ ((usblp->no_paper || usblp->wcomplete) ? EPOLLOUT | EPOLLWRNORM : 0);
spin_unlock_irqrestore(&usblp->lock, flags);
return ret;
}
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 7ea67a55be10..bdb1de0c0cef 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1265,13 +1265,13 @@ static __poll_t usbtmc_poll(struct file *file, poll_table *wait)
mutex_lock(&data->io_mutex);
if (data->zombie) {
- mask = POLLHUP | POLLERR;
+ mask = EPOLLHUP | EPOLLERR;
goto no_poll;
}
poll_wait(file, &data->waitq, wait);
- mask = (atomic_read(&data->srq_asserted)) ? POLLIN | POLLRDNORM : 0;
+ mask = (atomic_read(&data->srq_asserted)) ? EPOLLIN | EPOLLRDNORM : 0;
no_poll:
mutex_unlock(&data->io_mutex);
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index e2cec448779e..3de3c750b5f6 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -632,7 +632,7 @@ static __poll_t usb_device_poll(struct file *file,
event_count = atomic_read(&device_event.count);
if (file->f_version != event_count) {
file->f_version = event_count;
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index bf00166cbee0..d526595bc959 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -2578,11 +2578,11 @@ static __poll_t usbdev_poll(struct file *file,
poll_wait(file, &ps->wait, wait);
if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (!connected(ps))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (list_empty(&ps->list))
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 67564725e371..8f2cf3baa19c 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -644,7 +644,7 @@ static long ffs_ep0_ioctl(struct file *file, unsigned code, unsigned long value)
static __poll_t ffs_ep0_poll(struct file *file, poll_table *wait)
{
struct ffs_data *ffs = file->private_data;
- __poll_t mask = POLLWRNORM;
+ __poll_t mask = EPOLLWRNORM;
int ret;
poll_wait(file, &ffs->ev.waitq, wait);
@@ -656,19 +656,19 @@ static __poll_t ffs_ep0_poll(struct file *file, poll_table *wait)
switch (ffs->state) {
case FFS_READ_DESCRIPTORS:
case FFS_READ_STRINGS:
- mask |= POLLOUT;
+ mask |= EPOLLOUT;
break;
case FFS_ACTIVE:
switch (ffs->setup_state) {
case FFS_NO_SETUP:
if (ffs->ev.count)
- mask |= POLLIN;
+ mask |= EPOLLIN;
break;
case FFS_SETUP_PENDING:
case FFS_SETUP_CANCELLED:
- mask |= (POLLIN | POLLOUT);
+ mask |= (EPOLLIN | EPOLLOUT);
break;
}
case FFS_CLOSING:
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index a73efb1c47d0..54e859dcb25c 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -422,10 +422,10 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait)
poll_wait(file, &hidg->write_queue, wait);
if (WRITE_COND)
- ret |= POLLOUT | POLLWRNORM;
+ ret |= EPOLLOUT | EPOLLWRNORM;
if (READ_COND)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
return ret;
}
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 453578c4af69..d359efe06c76 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -698,11 +698,11 @@ printer_poll(struct file *fd, poll_table *wait)
spin_lock_irqsave(&dev->lock, flags);
if (likely(!list_empty(&dev->tx_reqs)))
- status |= POLLOUT | POLLWRNORM;
+ status |= EPOLLOUT | EPOLLWRNORM;
if (likely(dev->current_rx_bytes) ||
likely(!list_empty(&dev->rx_buffers)))
- status |= POLLIN | POLLRDNORM;
+ status |= EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&dev->lock, flags);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 5960e76f4c75..37ca0e669bd8 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1225,16 +1225,16 @@ ep0_poll (struct file *fd, poll_table *wait)
/* report fd mode change before acting on it */
if (dev->setup_abort) {
dev->setup_abort = 0;
- mask = POLLHUP;
+ mask = EPOLLHUP;
goto out;
}
if (dev->state == STATE_DEV_SETUP) {
if (dev->setup_in || dev->setup_can_stall)
- mask = POLLOUT;
+ mask = EPOLLOUT;
} else {
if (dev->ev_next != 0)
- mask = POLLIN;
+ mask = EPOLLIN;
}
out:
spin_unlock_irq(&dev->lock);
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 1fa00b35f4ad..8d33187ce2af 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -683,19 +683,19 @@ static __poll_t iowarrior_poll(struct file *file, poll_table * wait)
__poll_t mask = 0;
if (!dev->present)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
poll_wait(file, &dev->read_wait, wait);
poll_wait(file, &dev->write_wait, wait);
if (!dev->present)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
if (read_index(dev) != -1)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (atomic_read(&dev->write_busy) < MAX_WRITES_IN_FLIGHT)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 074398c1e410..63b9e85dc0e9 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -417,15 +417,15 @@ static __poll_t ld_usb_poll(struct file *file, poll_table *wait)
dev = file->private_data;
if (!dev->intf)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
poll_wait(file, &dev->read_wait, wait);
poll_wait(file, &dev->write_wait, wait);
if (dev->ring_head != dev->ring_tail)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!dev->interrupt_out_busy)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 941c45028828..bf47bd8bc76f 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -517,17 +517,17 @@ static __poll_t tower_poll (struct file *file, poll_table *wait)
dev = file->private_data;
if (!dev->udev)
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
poll_wait(file, &dev->read_wait, wait);
poll_wait(file, &dev->write_wait, wait);
tower_check_for_read_packet(dev);
if (dev->read_packet_length > 0) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (!dev->interrupt_out_busy) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index cc5b296bff3f..2761fad66b95 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1203,7 +1203,7 @@ mon_bin_poll(struct file *file, struct poll_table_struct *wait)
spin_lock_irqsave(&rp->b_lock, flags);
if (!MON_RING_EMPTY(rp))
- mask |= POLLIN | POLLRDNORM; /* readable */
+ mask |= EPOLLIN | EPOLLRDNORM; /* readable */
spin_unlock_irqrestore(&rp->b_lock, flags);
return mask;
}
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 8cc4b48ff127..085700f1be10 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -48,7 +48,7 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void
struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
__poll_t flags = key_to_poll(key);
- if (flags & POLLIN) {
+ if (flags & EPOLLIN) {
/* An event has been signaled, call function */
if ((!virqfd->handler ||
virqfd->handler(virqfd->opaque, virqfd->data)) &&
@@ -56,7 +56,7 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void
schedule_work(&virqfd->inject);
}
- if (flags & POLLHUP) {
+ if (flags & EPOLLHUP) {
unsigned long flags;
spin_lock_irqsave(&virqfd_lock, flags);
@@ -172,14 +172,14 @@ int vfio_virqfd_enable(void *opaque,
* Check if there was an event already pending on the eventfd
* before we registered and trigger it as if we didn't miss it.
*/
- if (events & POLLIN) {
+ if (events & EPOLLIN) {
if ((!handler || handler(opaque, data)) && thread)
schedule_work(&virqfd->inject);
}
/*
* Do not drop the file until the irqfd is fully initialized,
- * otherwise we might race against the POLLHUP.
+ * otherwise we might race against the EPOLLHUP.
*/
fdput(irqfd);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c613d2e3d371..b5fb56b822fd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -952,8 +952,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
- vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
- vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
f->private_data = n;
@@ -1038,7 +1038,7 @@ static struct socket *get_raw_socket(int fd)
struct sockaddr_ll sa;
char buf[MAX_ADDR_LEN];
} uaddr;
- int uaddr_len = sizeof uaddr, r;
+ int r;
struct socket *sock = sockfd_lookup(fd, &r);
if (!sock)
@@ -1050,9 +1050,8 @@ static struct socket *get_raw_socket(int fd)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
- &uaddr_len, 0);
- if (r)
+ r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+ if (r < 0)
goto err;
if (uaddr.sa.sll_family != AF_PACKET) {
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2db5af8e8652..1b3e8d2d5c8b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -211,7 +211,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
mask = file->f_op->poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
- if (mask & POLLERR) {
+ if (mask & EPOLLERR) {
if (poll->wqh)
remove_wait_queue(poll->wqh, &poll->wait);
ret = -EINVAL;
@@ -440,7 +440,7 @@ void vhost_dev_init(struct vhost_dev *dev,
vhost_vq_reset(dev, vq);
if (vq->handle_kick)
vhost_poll_init(&vq->poll, vq->handle_kick,
- POLLIN, dev);
+ EPOLLIN, dev);
}
}
EXPORT_SYMBOL_GPL(vhost_dev_init);
@@ -630,7 +630,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
vhost_umem_clean(dev->iotlb);
dev->iotlb = NULL;
vhost_clear_msg(dev);
- wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM);
+ wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
WARN_ON(!llist_empty(&dev->work_list));
if (dev->worker) {
kthread_stop(dev->worker);
@@ -1057,7 +1057,7 @@ __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev,
poll_wait(file, &dev->wait, wait);
if (!list_empty(&dev->read_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
@@ -2356,7 +2356,7 @@ void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head,
list_add_tail(&node->node, head);
spin_unlock(&dev->iotlb_lock);
- wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM);
+ wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
}
EXPORT_SYMBOL_GPL(vhost_enqueue_msg);
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index b0597bef4555..4e05d7f711fe 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -574,7 +574,7 @@ static __poll_t fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
spin_lock_irqsave(&dbq->lock, flags);
poll_wait(filp, &dbq->wait, p);
- mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM);
+ mask = (dbq->head == dbq->tail) ? 0 : (EPOLLIN | EPOLLRDNORM);
spin_unlock_irqrestore(&dbq->lock, flags);
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 72c0416a01cc..8cac07ab60ab 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -623,14 +623,14 @@ static long evtchn_ioctl(struct file *file,
static __poll_t evtchn_poll(struct file *file, poll_table *wait)
{
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
if (u->ring_cons != u->ring_prod)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow)
- mask = POLLERR;
+ mask = EPOLLERR;
return mask;
}
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 9ade533d9e40..262835ace35d 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -144,7 +144,7 @@ static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait)
poll_wait(file, &xen_mce_chrdev_wait, wait);
if (xen_mcelog.next)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 78804e71f9a6..753d9cb437d0 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -892,7 +892,7 @@ static __poll_t pvcalls_front_poll_passive(struct file *file,
if (req_id != PVCALLS_INVALID_ID &&
READ_ONCE(bedata->rsp[req_id].req_id) == req_id)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(file, &map->passive.inflight_accept_req, wait);
return 0;
@@ -900,7 +900,7 @@ static __poll_t pvcalls_front_poll_passive(struct file *file,
if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
(void *)&map->passive.flags))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
/*
* First check RET, then INFLIGHT. No barriers necessary to
@@ -949,11 +949,11 @@ static __poll_t pvcalls_front_poll_active(struct file *file,
poll_wait(file, &map->active.inflight_conn_req, wait);
if (pvcalls_front_write_todo(map))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (pvcalls_front_read_todo(map))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (in_error != 0 || out_error != 0)
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
@@ -968,14 +968,14 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock,
pvcalls_enter();
if (!pvcalls_front_dev) {
pvcalls_exit();
- return POLLNVAL;
+ return EPOLLNVAL;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
- return POLLNVAL;
+ return EPOLLNVAL;
}
if (map->active_socket)
ret = pvcalls_front_poll_active(file, bedata, map, wait);
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 3332978f4fcd..f694ad77379f 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -20,7 +20,7 @@ int pvcalls_front_recvmsg(struct socket *sock,
struct msghdr *msg,
size_t len,
int flags);
-unsigned int pvcalls_front_poll(struct file *file,
+__poll_t pvcalls_front_poll(struct file *file,
struct socket *sock,
poll_table *wait);
int pvcalls_front_release(struct socket *sock);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index e17ec3fce590..a493e99bed21 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -651,7 +651,7 @@ static __poll_t xenbus_file_poll(struct file *file, poll_table *wait)
poll_wait(file, &u->read_waitq, wait);
if (!list_empty(&u->read_buffers))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 7edbd0679952..3fdee214a5bb 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -289,7 +289,7 @@ found_command:
/*
* poll for culling state
- * - use POLLOUT to indicate culling state
+ * - use EPOLLOUT to indicate culling state
*/
static __poll_t cachefiles_daemon_poll(struct file *file,
struct poll_table_struct *poll)
@@ -301,10 +301,10 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
mask = 0;
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
- mask |= POLLIN;
+ mask |= EPOLLIN;
if (test_bit(CACHEFILES_CULLING, &cache->flags))
- mask |= POLLOUT;
+ mask |= EPOLLOUT;
return mask;
}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 49d3c6fda89a..c5234c21b539 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -39,7 +39,7 @@
#include <linux/device.h>
#include <linux/pid_namespace.h>
#include <asm/io.h>
-#include <asm/poll.h>
+#include <linux/poll.h>
#include <linux/uaccess.h>
#include <linux/coda.h>
@@ -64,12 +64,12 @@ static struct class *coda_psdev_class;
static __poll_t coda_psdev_poll(struct file *file, poll_table * wait)
{
struct venus_comm *vcp = (struct venus_comm *) file->private_data;
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
poll_wait(file, &vcp->vc_waitq, wait);
mutex_lock(&vcp->vc_mutex);
if (!list_empty(&vcp->vc_pending))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&vcp->vc_mutex);
return mask;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 6fdbf21be318..1f99678ff5d3 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -18,7 +18,7 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/device.h>
-#include <asm/poll.h>
+#include <linux/poll.h>
#include "internal.h"
@@ -214,7 +214,7 @@ static __poll_t full_proxy_poll(struct file *filp,
const struct file_operations *real_fops;
if (debugfs_file_get(dentry))
- return POLLHUP;
+ return EPOLLHUP;
real_fops = debugfs_real_fops(filp);
r = real_fops->poll(filp, wait);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
struct sockaddr_storage saddr;
- int buflen;
void (*orig_report)(struct sock *) = NULL;
read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
orig_report = listen_sock.sk_error_report;
if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
/* Get the connected socket's peer */
memset(&peeraddr, 0, sizeof(peeraddr));
- if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
- &len, 2)) {
+ len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+ if (len < 0) {
result = -ECONNABORTED;
goto accept_err;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index a4c63e9e6385..c7d5a2ea3d03 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -471,7 +471,7 @@ static __poll_t dev_poll(struct file *file, poll_table *wait)
spin_lock(&ops_lock);
if (!list_empty(&send_list))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
spin_unlock(&ops_lock);
return mask;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 662432af8ce8..2a669390cd7f 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -896,7 +896,7 @@ static __poll_t device_poll(struct file *file, poll_table *wait)
spin_lock(&proc->asts_spin);
if (!list_empty(&proc->asts)) {
spin_unlock(&proc->asts_spin);
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
spin_unlock(&proc->asts_spin);
return 0;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 7423e792a092..2d1158e5f950 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -59,7 +59,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
poll_wait(file, &daemon->wait, pt);
mutex_lock(&daemon->mux);
if (!list_empty(&daemon->msg_ctx_out_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
out_unlock_daemon:
daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
mutex_unlock(&daemon->mux);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 04fd824142a1..012f5bd46dfa 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -45,7 +45,7 @@ struct eventfd_ctx {
*
* This function is supposed to be called by the kernel in paths that do not
* allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
- * value, and we signal this as overflow condition by returning a POLLERR
+ * value, and we signal this as overflow condition by returning a EPOLLERR
* to poll(2).
*
* Returns the amount by which the counter was incremented. This will be less
@@ -60,7 +60,7 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
- wake_up_locked_poll(&ctx->wqh, POLLIN);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return n;
@@ -96,7 +96,7 @@ static int eventfd_release(struct inode *inode, struct file *file)
{
struct eventfd_ctx *ctx = file->private_data;
- wake_up_poll(&ctx->wqh, POLLHUP);
+ wake_up_poll(&ctx->wqh, EPOLLHUP);
eventfd_ctx_put(ctx);
return 0;
}
@@ -150,11 +150,11 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait)
count = READ_ONCE(ctx->count);
if (count > 0)
- events |= POLLIN;
+ events |= EPOLLIN;
if (count == ULLONG_MAX)
- events |= POLLERR;
+ events |= EPOLLERR;
if (ULLONG_MAX - 1 > count)
- events |= POLLOUT;
+ events |= EPOLLOUT;
return events;
}
@@ -187,7 +187,7 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
eventfd_ctx_do_read(ctx, cnt);
__remove_wait_queue(&ctx->wqh, wait);
if (*cnt != 0 && waitqueue_active(&ctx->wqh))
- wake_up_locked_poll(&ctx->wqh, POLLOUT);
+ wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return *cnt != 0 ? 0 : -EAGAIN;
@@ -231,7 +231,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
if (likely(res > 0)) {
eventfd_ctx_do_read(ctx, &ucnt);
if (waitqueue_active(&ctx->wqh))
- wake_up_locked_poll(&ctx->wqh, POLLOUT);
+ wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
}
spin_unlock_irq(&ctx->wqh.lock);
@@ -281,7 +281,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
if (likely(res > 0)) {
ctx->count += ucnt;
if (waitqueue_active(&ctx->wqh))
- wake_up_locked_poll(&ctx->wqh, POLLIN);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
}
spin_unlock_irq(&ctx->wqh.lock);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 42e35a6977c9..0f3494ed3ed0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -95,9 +95,9 @@
/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
-#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+#define EPOLLINOUT_BITS (EPOLLIN | EPOLLOUT)
-#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | EPOLLERR | EPOLLHUP | \
EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
/* Maximum number of nesting allowed inside epoll sets */
@@ -260,6 +260,7 @@ struct ep_pqueue {
struct ep_send_events_data {
int maxevents;
struct epoll_event __user *events;
+ int res;
};
/*
@@ -554,7 +555,7 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie;
spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1);
- wake_up_locked_poll(wqueue, POLLIN);
+ wake_up_locked_poll(wqueue, EPOLLIN);
spin_unlock_irqrestore(&wqueue->lock, flags);
return 0;
@@ -574,7 +575,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
static void ep_poll_safewake(wait_queue_head_t *wq)
{
- wake_up_poll(wq, POLLIN);
+ wake_up_poll(wq, EPOLLIN);
}
#endif
@@ -660,12 +661,13 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
*
* Returns: The same integer error code returned by the @sproc callback.
*/
-static int ep_scan_ready_list(struct eventpoll *ep,
- int (*sproc)(struct eventpoll *,
+static __poll_t ep_scan_ready_list(struct eventpoll *ep,
+ __poll_t (*sproc)(struct eventpoll *,
struct list_head *, void *),
void *priv, int depth, bool ep_locked)
{
- int error, pwake = 0;
+ __poll_t res;
+ int pwake = 0;
unsigned long flags;
struct epitem *epi, *nepi;
LIST_HEAD(txlist);
@@ -694,7 +696,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
/*
* Now call the callback function.
*/
- error = (*sproc)(ep, &txlist, priv);
+ res = (*sproc)(ep, &txlist, priv);
spin_lock_irqsave(&ep->lock, flags);
/*
@@ -747,7 +749,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
if (pwake)
ep_poll_safewake(&ep->poll_wait);
- return error;
+ return res;
}
static void epi_rcu_free(struct rcu_head *head)
@@ -864,7 +866,7 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
return 0;
}
-static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
void *priv);
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt);
@@ -874,7 +876,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
* the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
* is correctly annotated.
*/
-static unsigned int ep_item_poll(const struct epitem *epi, poll_table *pt,
+static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
int depth)
{
struct eventpoll *ep;
@@ -894,7 +896,7 @@ static unsigned int ep_item_poll(const struct epitem *epi, poll_table *pt,
locked) & epi->event.events;
}
-static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
void *priv)
{
struct epitem *epi, *tmp;
@@ -906,7 +908,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
list_for_each_entry_safe(epi, tmp, head, rdllink) {
if (ep_item_poll(epi, &pt, depth)) {
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
} else {
/*
* Item has been dropped into the ready list by the poll
@@ -1179,12 +1181,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!(pollflags & POLLFREE)) {
switch (pollflags & EPOLLINOUT_BITS) {
- case POLLIN:
- if (epi->event.events & POLLIN)
+ case EPOLLIN:
+ if (epi->event.events & EPOLLIN)
ewake = 1;
break;
- case POLLOUT:
- if (epi->event.events & POLLOUT)
+ case EPOLLOUT:
+ if (epi->event.events & EPOLLOUT)
ewake = 1;
break;
case 0:
@@ -1414,7 +1416,8 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
struct file *tfile, int fd, int full_check)
{
- int error, revents, pwake = 0;
+ int error, pwake = 0;
+ __poll_t revents;
unsigned long flags;
long user_watches;
struct epitem *epi;
@@ -1612,12 +1615,11 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
return 0;
}
-static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
+static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
void *priv)
{
struct ep_send_events_data *esed = priv;
- int eventcnt;
- unsigned int revents;
+ __poll_t revents;
struct epitem *epi;
struct epoll_event __user *uevent;
struct wakeup_source *ws;
@@ -1630,8 +1632,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
* Items cannot vanish during the loop because ep_scan_ready_list() is
* holding "mtx" during this call.
*/
- for (eventcnt = 0, uevent = esed->events;
- !list_empty(head) && eventcnt < esed->maxevents;) {
+ for (esed->res = 0, uevent = esed->events;
+ !list_empty(head) && esed->res < esed->maxevents;) {
epi = list_first_entry(head, struct epitem, rdllink);
/*
@@ -1665,9 +1667,11 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
__put_user(epi->event.data, &uevent->data)) {
list_add(&epi->rdllink, head);
ep_pm_stay_awake(epi);
- return eventcnt ? eventcnt : -EFAULT;
+ if (!esed->res)
+ esed->res = -EFAULT;
+ return 0;
}
- eventcnt++;
+ esed->res++;
uevent++;
if (epi->event.events & EPOLLONESHOT)
epi->event.events &= EP_PRIVATE_BITS;
@@ -1689,7 +1693,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
}
}
- return eventcnt;
+ return 0;
}
static int ep_send_events(struct eventpoll *ep,
@@ -1700,7 +1704,8 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
+ ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
+ return esed.res;
}
static inline struct timespec64 ep_set_mstimeout(long ms)
@@ -2100,7 +2105,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
- epds.events |= POLLERR | POLLHUP;
+ epds.events |= EPOLLERR | EPOLLHUP;
error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
@@ -2116,7 +2121,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_MOD:
if (epi) {
if (!(epi->event.events & EPOLLEXCLUSIVE)) {
- epds.events |= POLLERR | POLLHUP;
+ epds.events |= EPOLLERR | EPOLLHUP;
error = ep_modify(ep, epi, &epds);
}
} else
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e95fa0a352ea..1e97f1fda90c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,7 +26,7 @@
#include <linux/shmem_fs.h>
#include <linux/compat.h>
-#include <asm/poll.h>
+#include <linux/poll.h>
#include <asm/siginfo.h>
#include <linux/uaccess.h>
@@ -691,12 +691,12 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
/* Table to convert sigio signal codes into poll band bitmaps */
static const __poll_t band_table[NSIGPOLL] = {
- POLLIN | POLLRDNORM, /* POLL_IN */
- POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
- POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
- POLLERR, /* POLL_ERR */
- POLLPRI | POLLRDBAND, /* POLL_PRI */
- POLLHUP | POLLERR /* POLL_HUP */
+ EPOLLIN | EPOLLRDNORM, /* POLL_IN */
+ EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */
+ EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */
+ EPOLLERR, /* POLL_ERR */
+ EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */
+ EPOLLHUP | EPOLLERR /* POLL_HUP */
};
static inline int sigio_perm(struct task_struct *p,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index aa089a6925d0..5d06384c2cae 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2006,21 +2006,21 @@ out:
static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
{
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
struct fuse_iqueue *fiq;
struct fuse_dev *fud = fuse_get_dev(file);
if (!fud)
- return POLLERR;
+ return EPOLLERR;
fiq = &fud->fc->iq;
poll_wait(file, &fiq->waitq, wait);
spin_lock(&fiq->waitq.lock);
if (!fiq->connected)
- mask = POLLERR;
+ mask = EPOLLERR;
else if (request_pending(fiq))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
spin_unlock(&fiq->waitq.lock);
return mask;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e85e974dd211..a201fb0ac64f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2791,7 +2791,7 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
fc->no_poll = 1;
return DEFAULT_POLLMASK;
}
- return POLLERR;
+ return EPOLLERR;
}
EXPORT_SYMBOL_GPL(fuse_file_poll);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index a03ce3422578..fd5ce883072e 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -823,7 +823,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
* the content and then you use 'poll' or 'select' to wait for
* the content to change. When the content changes (assuming the
* manager for the kobject supports notification), poll will
- * return POLLERR|POLLPRI, and select will return the fd whether
+ * return EPOLLERR|EPOLLPRI, and select will return the fd whether
* it is waiting for read, write, or exceptions.
* Once poll/select indicates that the value has changed, you
* need to close and re-open the file, or seek to 0 and read again.
@@ -851,7 +851,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
return DEFAULT_POLLMASK;
trigger:
- return DEFAULT_POLLMASK|POLLERR|POLLPRI;
+ return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
}
static void kernfs_notify_workfn(struct work_struct *work)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ef08d64c84b8..c07eb3d655ea 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -247,7 +247,7 @@ static __poll_t fanotify_poll(struct file *file, poll_table *wait)
poll_wait(file, &group->notification_waitq, wait);
spin_lock(&group->notification_lock);
if (!fsnotify_notify_queue_is_empty(group))
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
spin_unlock(&group->notification_lock);
return ret;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5c29bf16814f..2c908b31d6c9 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -115,7 +115,7 @@ static __poll_t inotify_poll(struct file *file, poll_table *wait)
poll_wait(file, &group->notification_waitq, wait);
spin_lock(&group->notification_lock);
if (!fsnotify_notify_queue_is_empty(group))
- ret = POLLIN | POLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
spin_unlock(&group->notification_lock);
return ret;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
static int o2net_accept_one(struct socket *sock, int *more)
{
- int ret, slen;
+ int ret;
struct sockaddr_in sin;
struct socket *new_sock = NULL;
struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
- slen = sizeof(sin);
- ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
- &slen, 1);
+ ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
goto out;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 385fcefa8bc5..602c71f32740 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -71,7 +71,7 @@ struct workqueue_struct *user_dlm_worker;
* Over time, dlmfs has added some features that were not part of the
* initial ABI. Unfortunately, some of these features are not detectable
* via standard usage. For example, Linux's default poll always returns
- * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs
+ * EPOLLIN, so there is no way for a caller of poll(2) to know when dlmfs
* added poll support. Instead, we provide this list of new capabilities.
*
* Capabilities is a read-only attribute. We do it as a module parameter
@@ -83,7 +83,7 @@ struct workqueue_struct *user_dlm_worker;
* interaction.
*
* Capabilities:
- * - bast : POLLIN against the file descriptor of a held lock
+ * - bast : EPOLLIN against the file descriptor of a held lock
* signifies a bast fired on the lock.
*/
#define DLMFS_CAPABILITIES "bast stackglue"
@@ -230,7 +230,7 @@ static __poll_t dlmfs_file_poll(struct file *file, poll_table *wait)
spin_lock(&ip->ip_lockres.l_lock);
if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED)
- event = POLLIN | POLLRDNORM;
+ event = EPOLLIN | EPOLLRDNORM;
spin_unlock(&ip->ip_lockres.l_lock);
return event;
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index f073cd9e6687..b03057afac2a 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -823,7 +823,7 @@ static __poll_t orangefs_devreq_poll(struct file *file,
poll_wait(file, &orangefs_request_list_waitq, poll_table);
if (!list_empty(&orangefs_request_list))
- poll_revent_mask |= POLLIN;
+ poll_revent_mask |= EPOLLIN;
return poll_revent_mask;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 0913aed7fd0d..7b1954caf388 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -327,7 +327,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
+ wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
pipe_wait(pipe);
@@ -336,7 +336,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
+ wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
if (ret > 0)
@@ -463,7 +463,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
+ wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
@@ -474,7 +474,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
out:
__pipe_unlock(pipe);
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
+ wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
@@ -523,19 +523,19 @@ pipe_poll(struct file *filp, poll_table *wait)
nrbufs = pipe->nrbufs;
mask = 0;
if (filp->f_mode & FMODE_READ) {
- mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
+ mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
}
if (filp->f_mode & FMODE_WRITE) {
- mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
+ mask |= (nrbufs < pipe->buffers) ? EPOLLOUT | EPOLLWRNORM : 0;
/*
- * Most Unices do not set POLLERR for FIFOs but on Linux they
+ * Most Unices do not set EPOLLERR for FIFOs but on Linux they
* behave exactly like pipes for poll().
*/
if (!pipe->readers)
- mask |= POLLERR;
+ mask |= EPOLLERR;
}
return mask;
@@ -568,7 +568,7 @@ pipe_release(struct inode *inode, struct file *file)
pipe->writers--;
if (pipe->readers || pipe->writers) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
+ wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
@@ -936,7 +936,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
if (!is_pipe && !pipe->writers) {
if ((filp->f_flags & O_NONBLOCK)) {
- /* suppress POLLHUP until we have
+ /* suppress EPOLLHUP until we have
* seen a writer */
filp->f_version = pipe->w_counter;
} else {
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index f0bfb45c3f9f..4f4a2abb225e 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -44,7 +44,7 @@ static __poll_t kmsg_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &log_wait, wait);
if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 63325377621a..c41ab261397d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -640,7 +640,7 @@ static __poll_t proc_sys_poll(struct file *filp, poll_table *wait)
/* sysctl was unregistered */
if (IS_ERR(head))
- return POLLERR | POLLHUP;
+ return EPOLLERR | EPOLLHUP;
if (!table->proc_handler)
goto out;
@@ -653,7 +653,7 @@ static __poll_t proc_sys_poll(struct file *filp, poll_table *wait)
if (event != atomic_read(&table->poll->event)) {
filp->private_data = proc_sys_poll_event(table->poll);
- ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
+ ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
}
out:
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index c8528d587e09..e16fb8f2049e 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -23,7 +23,7 @@ static __poll_t mounts_poll(struct file *file, poll_table *wait)
struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private;
struct mnt_namespace *ns = p->ns;
- __poll_t res = POLLIN | POLLRDNORM;
+ __poll_t res = EPOLLIN | EPOLLRDNORM;
int event;
poll_wait(file, &p->ns->poll, wait);
@@ -31,7 +31,7 @@ static __poll_t mounts_poll(struct file *file, poll_table *wait)
event = READ_ONCE(ns->event);
if (m->poll_event != event) {
m->poll_event = event;
- res |= POLLERR | POLLPRI;
+ res |= EPOLLERR | EPOLLPRI;
}
return res;
diff --git a/fs/select.c b/fs/select.c
index ec14171dd78a..b6c36254028a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -432,9 +432,9 @@ get_max:
return max;
}
-#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
-#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
-#define POLLEX_SET (POLLPRI)
+#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
+#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
+#define POLLEX_SET (EPOLLPRI)
static inline void wait_key_set(poll_table *wait, unsigned long in,
unsigned long out, unsigned long bit,
@@ -814,11 +814,11 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
fd = pollfd->fd;
if (fd >= 0) {
struct fd f = fdget(fd);
- mask = POLLNVAL;
+ mask = EPOLLNVAL;
if (f.file) {
/* userland u16 ->events contains POLL... bitmap */
__poll_t filter = demangle_poll(pollfd->events) |
- POLLERR | POLLHUP;
+ EPOLLERR | EPOLLHUP;
mask = DEFAULT_POLLMASK;
if (f.file->f_op->poll) {
pwait->_key = filter;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 31e923bec99a..9990957264e3 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -45,7 +45,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
return;
/* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
- wake_up_poll(wqh, POLLHUP | POLLFREE);
+ wake_up_poll(wqh, EPOLLHUP | POLLFREE);
}
struct signalfd_ctx {
@@ -69,7 +69,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
if (next_signal(&current->pending, &ctx->sigmask) ||
next_signal(&current->signal->shared_pending,
&ctx->sigmask))
- events |= POLLIN;
+ events |= EPOLLIN;
spin_unlock_irq(&current->sighand->siglock);
return events;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0510717f3a53..cdad49da3ff7 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -237,7 +237,7 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
spin_lock_irqsave(&ctx->wqh.lock, flags);
if (ctx->ticks)
- events |= POLLIN;
+ events |= EPOLLIN;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return events;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 87a13a7c8270..cec550c8468f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -483,7 +483,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
if (likely(must_wait && !READ_ONCE(ctx->released) &&
(return_to_userland ? !signal_pending(current) :
!fatal_signal_pending(current)))) {
- wake_up_poll(&ctx->fd_wqh, POLLIN);
+ wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
@@ -614,7 +614,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
spin_unlock(&ctx->event_wqh.lock);
- wake_up_poll(&ctx->fd_wqh, POLLIN);
+ wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
spin_lock(&ctx->event_wqh.lock);
@@ -904,7 +904,7 @@ wakeup:
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
- wake_up_poll(&ctx->fd_wqh, POLLHUP);
+ wake_up_poll(&ctx->fd_wqh, EPOLLHUP);
userfaultfd_ctx_put(ctx);
return 0;
}
@@ -949,14 +949,14 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
switch (ctx->state) {
case UFFD_STATE_WAIT_API:
- return POLLERR;
+ return EPOLLERR;
case UFFD_STATE_RUNNING:
/*
* poll() never guarantees that read won't block.
* userfaults can be waken before they're read().
*/
if (unlikely(!(file->f_flags & O_NONBLOCK)))
- return POLLERR;
+ return EPOLLERR;
/*
* lockless access to see if there are pending faults
* __pollwait last action is the add_wait_queue but
@@ -970,14 +970,14 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
ret = 0;
smp_mb();
if (waitqueue_active(&ctx->fault_pending_wqh))
- ret = POLLIN;
+ ret = EPOLLIN;
else if (waitqueue_active(&ctx->event_wqh))
- ret = POLLIN;
+ ret = EPOLLIN;
return ret;
default:
WARN_ON_ONCE(1);
- return POLLERR;
+ return EPOLLERR;
}
}
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 9da6ce22803f..6502feb9524b 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -90,6 +90,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
void kvm_timer_init_vhe(void);
+bool kvm_arch_timer_get_input_level(int vintid);
+
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8c896540a72c..cdbd142ca7f2 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -130,6 +130,17 @@ struct vgic_irq {
u8 priority;
enum vgic_irq_config config; /* Level or edge */
+ /*
+ * Callback function pointer to in-kernel devices that can tell us the
+ * state of the input level of mapped level-triggered IRQ faster than
+ * peaking into the physical GIC.
+ *
+ * Always called in non-preemptible section and the functions can use
+ * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
+ * IRQs.
+ */
+ bool (*get_input_level)(int vintid);
+
void *owner; /* Opaque pointer to reserve an interrupt
for in-kernel devices. */
};
@@ -331,7 +342,7 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid);
+ u32 vintid, bool (*get_input_level)(int vindid));
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6bdd4b9f6611..ac0062b74aed 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -533,7 +533,7 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
-int __must_check vcpu_load(struct kvm_vcpu *vcpu);
+void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
@@ -1260,4 +1260,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
+#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg);
+#else
+static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl,
+ unsigned long arg)
+{
+ return -ENOIOCTLCMD;
+}
+#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
+
#endif
diff --git a/include/linux/net.h b/include/linux/net.h
index 91216b16feb7..000d1aada74f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
- int *sockaddr_len, int peer);
+ int peer);
__poll_t (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
@@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog);
int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
int *optlen);
int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 04781a753326..f45ebd017eaa 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -11,6 +11,7 @@
#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <uapi/linux/poll.h>
+#include <uapi/linux/eventpoll.h>
extern struct ctl_table epoll_table[]; /* for sysctl */
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
@@ -22,7 +23,7 @@ extern struct ctl_table epoll_table[]; /* for sysctl */
#define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC)
#define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry))
-#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
+#define DEFAULT_POLLMASK (EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM)
struct poll_table_struct;
@@ -107,4 +108,28 @@ extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
extern int poll_select_set_timeout(struct timespec64 *to, time64_t sec,
long nsec);
+#define __MAP(v, from, to) \
+ (from < to ? (v & from) * (to/from) : (v & from) / (from/to))
+
+static inline __u16 mangle_poll(__poll_t val)
+{
+ __u16 v = (__force __u16)val;
+#define M(X) __MAP(v, (__force __u16)EPOLL##X, POLL##X)
+ return M(IN) | M(OUT) | M(PRI) | M(ERR) | M(NVAL) |
+ M(RDNORM) | M(RDBAND) | M(WRNORM) | M(WRBAND) |
+ M(HUP) | M(RDHUP) | M(MSG);
+#undef M
+}
+
+static inline __poll_t demangle_poll(u16 val)
+{
+#define M(X) (__force __poll_t)__MAP(val, POLL##X, (__force __u16)EPOLL##X)
+ return M(IN) | M(OUT) | M(PRI) | M(ERR) | M(NVAL) |
+ M(RDNORM) | M(RDBAND) | M(WRNORM) | M(WRBAND) |
+ M(HUP) | M(RDHUP) | M(MSG);
+#undef M
+}
+#undef __MAP
+
+
#endif /* _LINUX_POLL_H */
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
new file mode 100644
index 000000000000..93addfa34061
--- /dev/null
+++ b/include/linux/psp-sev.h
@@ -0,0 +1,606 @@
+/*
+ * AMD Secure Encrypted Virtualization (SEV) driver interface
+ *
+ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <[email protected]>
+ *
+ * SEV spec 0.14 is available at:
+ * http://support.amd.com/TechDocs/55766_SEV-KM API_Specification.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PSP_SEV_H__
+#define __PSP_SEV_H__
+
+#include <uapi/linux/psp-sev.h>
+
+#ifdef CONFIG_X86
+#include <linux/mem_encrypt.h>
+
+#define __psp_pa(x) __sme_pa(x)
+#else
+#define __psp_pa(x) __pa(x)
+#endif
+
+#define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */
+
+/**
+ * SEV platform state
+ */
+enum sev_state {
+ SEV_STATE_UNINIT = 0x0,
+ SEV_STATE_INIT = 0x1,
+ SEV_STATE_WORKING = 0x2,
+
+ SEV_STATE_MAX
+};
+
+/**
+ * SEV platform and guest management commands
+ */
+enum sev_cmd {
+ /* platform commands */
+ SEV_CMD_INIT = 0x001,
+ SEV_CMD_SHUTDOWN = 0x002,
+ SEV_CMD_FACTORY_RESET = 0x003,
+ SEV_CMD_PLATFORM_STATUS = 0x004,
+ SEV_CMD_PEK_GEN = 0x005,
+ SEV_CMD_PEK_CSR = 0x006,
+ SEV_CMD_PEK_CERT_IMPORT = 0x007,
+ SEV_CMD_PDH_CERT_EXPORT = 0x008,
+ SEV_CMD_PDH_GEN = 0x009,
+ SEV_CMD_DF_FLUSH = 0x00A,
+
+ /* Guest commands */
+ SEV_CMD_DECOMMISSION = 0x020,
+ SEV_CMD_ACTIVATE = 0x021,
+ SEV_CMD_DEACTIVATE = 0x022,
+ SEV_CMD_GUEST_STATUS = 0x023,
+
+ /* Guest launch commands */
+ SEV_CMD_LAUNCH_START = 0x030,
+ SEV_CMD_LAUNCH_UPDATE_DATA = 0x031,
+ SEV_CMD_LAUNCH_UPDATE_VMSA = 0x032,
+ SEV_CMD_LAUNCH_MEASURE = 0x033,
+ SEV_CMD_LAUNCH_UPDATE_SECRET = 0x034,
+ SEV_CMD_LAUNCH_FINISH = 0x035,
+
+ /* Guest migration commands (outgoing) */
+ SEV_CMD_SEND_START = 0x040,
+ SEV_CMD_SEND_UPDATE_DATA = 0x041,
+ SEV_CMD_SEND_UPDATE_VMSA = 0x042,
+ SEV_CMD_SEND_FINISH = 0x043,
+
+ /* Guest migration commands (incoming) */
+ SEV_CMD_RECEIVE_START = 0x050,
+ SEV_CMD_RECEIVE_UPDATE_DATA = 0x051,
+ SEV_CMD_RECEIVE_UPDATE_VMSA = 0x052,
+ SEV_CMD_RECEIVE_FINISH = 0x053,
+
+ /* Guest debug commands */
+ SEV_CMD_DBG_DECRYPT = 0x060,
+ SEV_CMD_DBG_ENCRYPT = 0x061,
+
+ SEV_CMD_MAX,
+};
+
+/**
+ * struct sev_data_init - INIT command parameters
+ *
+ * @flags: processing flags
+ * @tmr_address: system physical address used for SEV-ES
+ * @tmr_len: len of tmr_address
+ */
+struct sev_data_init {
+ u32 flags; /* In */
+ u32 reserved; /* In */
+ u64 tmr_address; /* In */
+ u32 tmr_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_pek_csr - PEK_CSR command parameters
+ *
+ * @address: PEK certificate chain
+ * @len: len of certificate
+ */
+struct sev_data_pek_csr {
+ u64 address; /* In */
+ u32 len; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_cert_import - PEK_CERT_IMPORT command parameters
+ *
+ * @pek_address: PEK certificate chain
+ * @pek_len: len of PEK certificate
+ * @oca_address: OCA certificate chain
+ * @oca_len: len of OCA certificate
+ */
+struct sev_data_pek_cert_import {
+ u64 pek_cert_address; /* In */
+ u32 pek_cert_len; /* In */
+ u32 reserved; /* In */
+ u64 oca_cert_address; /* In */
+ u32 oca_cert_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_pdh_cert_export - PDH_CERT_EXPORT command parameters
+ *
+ * @pdh_address: PDH certificate address
+ * @pdh_len: len of PDH certificate
+ * @cert_chain_address: PDH certificate chain
+ * @cert_chain_len: len of PDH certificate chain
+ */
+struct sev_data_pdh_cert_export {
+ u64 pdh_cert_address; /* In */
+ u32 pdh_cert_len; /* In/Out */
+ u32 reserved; /* In */
+ u64 cert_chain_address; /* In */
+ u32 cert_chain_len; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_decommission - DECOMMISSION command parameters
+ *
+ * @handle: handle of the VM to decommission
+ */
+struct sev_data_decommission {
+ u32 handle; /* In */
+} __packed;
+
+/**
+ * struct sev_data_activate - ACTIVATE command parameters
+ *
+ * @handle: handle of the VM to activate
+ * @asid: asid assigned to the VM
+ */
+struct sev_data_activate {
+ u32 handle; /* In */
+ u32 asid; /* In */
+} __packed;
+
+/**
+ * struct sev_data_deactivate - DEACTIVATE command parameters
+ *
+ * @handle: handle of the VM to deactivate
+ */
+struct sev_data_deactivate {
+ u32 handle; /* In */
+} __packed;
+
+/**
+ * struct sev_data_guest_status - SEV GUEST_STATUS command parameters
+ *
+ * @handle: handle of the VM to retrieve status
+ * @policy: policy information for the VM
+ * @asid: current ASID of the VM
+ * @state: current state of the VM
+ */
+struct sev_data_guest_status {
+ u32 handle; /* In */
+ u32 policy; /* Out */
+ u32 asid; /* Out */
+ u8 state; /* Out */
+} __packed;
+
+/**
+ * struct sev_data_launch_start - LAUNCH_START command parameters
+ *
+ * @handle: handle assigned to the VM
+ * @policy: guest launch policy
+ * @dh_cert_address: physical address of DH certificate blob
+ * @dh_cert_len: len of DH certificate blob
+ * @session_address: physical address of session parameters
+ * @session_len: len of session parameters
+ */
+struct sev_data_launch_start {
+ u32 handle; /* In/Out */
+ u32 policy; /* In */
+ u64 dh_cert_address; /* In */
+ u32 dh_cert_len; /* In */
+ u32 reserved; /* In */
+ u64 session_address; /* In */
+ u32 session_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_launch_update_data - LAUNCH_UPDATE_DATA command parameter
+ *
+ * @handle: handle of the VM to update
+ * @len: len of memory to be encrypted
+ * @address: physical address of memory region to encrypt
+ */
+struct sev_data_launch_update_data {
+ u32 handle; /* In */
+ u32 reserved;
+ u64 address; /* In */
+ u32 len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_launch_update_vmsa - LAUNCH_UPDATE_VMSA command
+ *
+ * @handle: handle of the VM
+ * @address: physical address of memory region to encrypt
+ * @len: len of memory region to encrypt
+ */
+struct sev_data_launch_update_vmsa {
+ u32 handle; /* In */
+ u32 reserved;
+ u64 address; /* In */
+ u32 len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_launch_measure - LAUNCH_MEASURE command parameters
+ *
+ * @handle: handle of the VM to process
+ * @address: physical address containing the measurement blob
+ * @len: len of measurement blob
+ */
+struct sev_data_launch_measure {
+ u32 handle; /* In */
+ u32 reserved;
+ u64 address; /* In */
+ u32 len; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_launch_secret - LAUNCH_SECRET command parameters
+ *
+ * @handle: handle of the VM to process
+ * @hdr_address: physical address containing the packet header
+ * @hdr_len: len of packet header
+ * @guest_address: system physical address of guest memory region
+ * @guest_len: len of guest_paddr
+ * @trans_address: physical address of transport memory buffer
+ * @trans_len: len of transport memory buffer
+ */
+struct sev_data_launch_secret {
+ u32 handle; /* In */
+ u32 reserved1;
+ u64 hdr_address; /* In */
+ u32 hdr_len; /* In */
+ u32 reserved2;
+ u64 guest_address; /* In */
+ u32 guest_len; /* In */
+ u32 reserved3;
+ u64 trans_address; /* In */
+ u32 trans_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_launch_finish - LAUNCH_FINISH command parameters
+ *
+ * @handle: handle of the VM to process
+ */
+struct sev_data_launch_finish {
+ u32 handle; /* In */
+} __packed;
+
+/**
+ * struct sev_data_send_start - SEND_START command parameters
+ *
+ * @handle: handle of the VM to process
+ * @policy: policy information for the VM
+ * @pdh_cert_address: physical address containing PDH certificate
+ * @pdh_cert_len: len of PDH certificate
+ * @plat_certs_address: physical address containing platform certificate
+ * @plat_certs_len: len of platform certificate
+ * @amd_certs_address: physical address containing AMD certificate
+ * @amd_certs_len: len of AMD certificate
+ * @session_address: physical address containing Session data
+ * @session_len: len of session data
+ */
+struct sev_data_send_start {
+ u32 handle; /* In */
+ u32 policy; /* Out */
+ u64 pdh_cert_address; /* In */
+ u32 pdh_cert_len; /* In */
+ u32 reserved1;
+ u64 plat_cert_address; /* In */
+ u32 plat_cert_len; /* In */
+ u32 reserved2;
+ u64 amd_cert_address; /* In */
+ u32 amd_cert_len; /* In */
+ u32 reserved3;
+ u64 session_address; /* In */
+ u32 session_len; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_send_update - SEND_UPDATE_DATA command
+ *
+ * @handle: handle of the VM to process
+ * @hdr_address: physical address containing packet header
+ * @hdr_len: len of packet header
+ * @guest_address: physical address of guest memory region to send
+ * @guest_len: len of guest memory region to send
+ * @trans_address: physical address of host memory region
+ * @trans_len: len of host memory region
+ */
+struct sev_data_send_update_data {
+ u32 handle; /* In */
+ u32 reserved1;
+ u64 hdr_address; /* In */
+ u32 hdr_len; /* In/Out */
+ u32 reserved2;
+ u64 guest_address; /* In */
+ u32 guest_len; /* In */
+ u32 reserved3;
+ u64 trans_address; /* In */
+ u32 trans_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_send_update - SEND_UPDATE_VMSA command
+ *
+ * @handle: handle of the VM to process
+ * @hdr_address: physical address containing packet header
+ * @hdr_len: len of packet header
+ * @guest_address: physical address of guest memory region to send
+ * @guest_len: len of guest memory region to send
+ * @trans_address: physical address of host memory region
+ * @trans_len: len of host memory region
+ */
+struct sev_data_send_update_vmsa {
+ u32 handle; /* In */
+ u64 hdr_address; /* In */
+ u32 hdr_len; /* In/Out */
+ u32 reserved2;
+ u64 guest_address; /* In */
+ u32 guest_len; /* In */
+ u32 reserved3;
+ u64 trans_address; /* In */
+ u32 trans_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_send_finish - SEND_FINISH command parameters
+ *
+ * @handle: handle of the VM to process
+ */
+struct sev_data_send_finish {
+ u32 handle; /* In */
+} __packed;
+
+/**
+ * struct sev_data_receive_start - RECEIVE_START command parameters
+ *
+ * @handle: handle of the VM to perform receive operation
+ * @pdh_cert_address: system physical address containing PDH certificate blob
+ * @pdh_cert_len: len of PDH certificate blob
+ * @session_address: system physical address containing session blob
+ * @session_len: len of session blob
+ */
+struct sev_data_receive_start {
+ u32 handle; /* In/Out */
+ u32 policy; /* In */
+ u64 pdh_cert_address; /* In */
+ u32 pdh_cert_len; /* In */
+ u32 reserved1;
+ u64 session_address; /* In */
+ u32 session_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_receive_update_data - RECEIVE_UPDATE_DATA command parameters
+ *
+ * @handle: handle of the VM to update
+ * @hdr_address: physical address containing packet header blob
+ * @hdr_len: len of packet header
+ * @guest_address: system physical address of guest memory region
+ * @guest_len: len of guest memory region
+ * @trans_address: system physical address of transport buffer
+ * @trans_len: len of transport buffer
+ */
+struct sev_data_receive_update_data {
+ u32 handle; /* In */
+ u32 reserved1;
+ u64 hdr_address; /* In */
+ u32 hdr_len; /* In */
+ u32 reserved2;
+ u64 guest_address; /* In */
+ u32 guest_len; /* In */
+ u32 reserved3;
+ u64 trans_address; /* In */
+ u32 trans_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_receive_update_vmsa - RECEIVE_UPDATE_VMSA command parameters
+ *
+ * @handle: handle of the VM to update
+ * @hdr_address: physical address containing packet header blob
+ * @hdr_len: len of packet header
+ * @guest_address: system physical address of guest memory region
+ * @guest_len: len of guest memory region
+ * @trans_address: system physical address of transport buffer
+ * @trans_len: len of transport buffer
+ */
+struct sev_data_receive_update_vmsa {
+ u32 handle; /* In */
+ u32 reserved1;
+ u64 hdr_address; /* In */
+ u32 hdr_len; /* In */
+ u32 reserved2;
+ u64 guest_address; /* In */
+ u32 guest_len; /* In */
+ u32 reserved3;
+ u64 trans_address; /* In */
+ u32 trans_len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_receive_finish - RECEIVE_FINISH command parameters
+ *
+ * @handle: handle of the VM to finish
+ */
+struct sev_data_receive_finish {
+ u32 handle; /* In */
+} __packed;
+
+/**
+ * struct sev_data_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters
+ *
+ * @handle: handle of the VM to perform debug operation
+ * @src_addr: source address of data to operate on
+ * @dst_addr: destination address of data to operate on
+ * @len: len of data to operate on
+ */
+struct sev_data_dbg {
+ u32 handle; /* In */
+ u32 reserved;
+ u64 src_addr; /* In */
+ u64 dst_addr; /* In */
+ u32 len; /* In */
+} __packed;
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+
+/**
+ * sev_platform_init - perform SEV INIT command
+ *
+ * @error: SEV command return code
+ *
+ * Returns:
+ * 0 if the SEV successfully processed the command
+ * -%ENODEV if the SEV device is not available
+ * -%ENOTSUPP if the SEV does not support SEV
+ * -%ETIMEDOUT if the SEV command timed out
+ * -%EIO if the SEV returned a non-zero return code
+ */
+int sev_platform_init(int *error);
+
+/**
+ * sev_platform_status - perform SEV PLATFORM_STATUS command
+ *
+ * @status: sev_user_data_status structure to be processed
+ * @error: SEV command return code
+ *
+ * Returns:
+ * 0 if the SEV successfully processed the command
+ * -%ENODEV if the SEV device is not available
+ * -%ENOTSUPP if the SEV does not support SEV
+ * -%ETIMEDOUT if the SEV command timed out
+ * -%EIO if the SEV returned a non-zero return code
+ */
+int sev_platform_status(struct sev_user_data_status *status, int *error);
+
+/**
+ * sev_issue_cmd_external_user - issue SEV command by other driver with a file
+ * handle.
+ *
+ * This function can be used by other drivers to issue a SEV command on
+ * behalf of userspace. The caller must pass a valid SEV file descriptor
+ * so that we know that it has access to SEV device.
+ *
+ * @filep - SEV device file pointer
+ * @cmd - command to issue
+ * @data - command buffer
+ * @error: SEV command return code
+ *
+ * Returns:
+ * 0 if the SEV successfully processed the command
+ * -%ENODEV if the SEV device is not available
+ * -%ENOTSUPP if the SEV does not support SEV
+ * -%ETIMEDOUT if the SEV command timed out
+ * -%EIO if the SEV returned a non-zero return code
+ * -%EINVAL if the SEV file descriptor is not valid
+ */
+int sev_issue_cmd_external_user(struct file *filep, unsigned int id,
+ void *data, int *error);
+
+/**
+ * sev_guest_deactivate - perform SEV DEACTIVATE command
+ *
+ * @deactivate: sev_data_deactivate structure to be processed
+ * @sev_ret: sev command return code
+ *
+ * Returns:
+ * 0 if the sev successfully processed the command
+ * -%ENODEV if the sev device is not available
+ * -%ENOTSUPP if the sev does not support SEV
+ * -%ETIMEDOUT if the sev command timed out
+ * -%EIO if the sev returned a non-zero return code
+ */
+int sev_guest_deactivate(struct sev_data_deactivate *data, int *error);
+
+/**
+ * sev_guest_activate - perform SEV ACTIVATE command
+ *
+ * @activate: sev_data_activate structure to be processed
+ * @sev_ret: sev command return code
+ *
+ * Returns:
+ * 0 if the sev successfully processed the command
+ * -%ENODEV if the sev device is not available
+ * -%ENOTSUPP if the sev does not support SEV
+ * -%ETIMEDOUT if the sev command timed out
+ * -%EIO if the sev returned a non-zero return code
+ */
+int sev_guest_activate(struct sev_data_activate *data, int *error);
+
+/**
+ * sev_guest_df_flush - perform SEV DF_FLUSH command
+ *
+ * @sev_ret: sev command return code
+ *
+ * Returns:
+ * 0 if the sev successfully processed the command
+ * -%ENODEV if the sev device is not available
+ * -%ENOTSUPP if the sev does not support SEV
+ * -%ETIMEDOUT if the sev command timed out
+ * -%EIO if the sev returned a non-zero return code
+ */
+int sev_guest_df_flush(int *error);
+
+/**
+ * sev_guest_decommission - perform SEV DECOMMISSION command
+ *
+ * @decommission: sev_data_decommission structure to be processed
+ * @sev_ret: sev command return code
+ *
+ * Returns:
+ * 0 if the sev successfully processed the command
+ * -%ENODEV if the sev device is not available
+ * -%ENOTSUPP if the sev does not support SEV
+ * -%ETIMEDOUT if the sev command timed out
+ * -%EIO if the sev returned a non-zero return code
+ */
+int sev_guest_decommission(struct sev_data_decommission *data, int *error);
+
+void *psp_copy_user_blob(u64 __user uaddr, u32 len);
+
+#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
+
+static inline int
+sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; }
+
+static inline int sev_platform_init(int *error) { return -ENODEV; }
+
+static inline int
+sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; }
+
+static inline int
+sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; }
+
+static inline int
+sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; }
+
+static inline int sev_guest_df_flush(int *error) { return -ENODEV; }
+
+static inline int
+sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int *error) { return -ENODEV; }
+
+static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); }
+
+#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
+
+#endif /* __PSP_SEV_H__ */
diff --git a/include/linux/scif.h b/include/linux/scif.h
index 7046111b8d0a..eeb250b73c4b 100644
--- a/include/linux/scif.h
+++ b/include/linux/scif.h
@@ -1266,8 +1266,8 @@ int scif_put_pages(struct scif_range *pages);
* events is a bitmask specifying the events which the application is
* interested in. The field revents is an output parameter, filled by the
* kernel with the events that actually occurred. The bits returned in revents
- * can include any of those specified in events, or one of the values POLLERR,
- * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events
+ * can include any of those specified in events, or one of the values EPOLLERR,
+ * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events
* field, and will be set in the revents field whenever the corresponding
* condition is true.)
*
@@ -1279,20 +1279,20 @@ int scif_put_pages(struct scif_range *pages);
* timeout means an infinite timeout.
*
* The following bits may be set in events and returned in revents.
- * POLLIN - Data may be received without blocking. For a connected
+ * EPOLLIN - Data may be received without blocking. For a connected
* endpoint, this means that scif_recv() may be called without blocking. For a
* listening endpoint, this means that scif_accept() may be called without
* blocking.
- * POLLOUT - Data may be sent without blocking. For a connected endpoint, this
- * means that scif_send() may be called without blocking. POLLOUT may also be
+ * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this
+ * means that scif_send() may be called without blocking. EPOLLOUT may also be
* used to block waiting for a non-blocking connect to complete. This bit value
* has no meaning for a listening endpoint and is ignored if specified.
*
* The following bits are only returned in revents, and are ignored if set in
* events.
- * POLLERR - An error occurred on the endpoint
- * POLLHUP - The connection to the peer endpoint was disconnected
- * POLLNVAL - The specified endpoint descriptor is invalid.
+ * EPOLLERR - An error occurred on the endpoint
+ * EPOLLHUP - The connection to the peer endpoint was disconnected
+ * EPOLLNVAL - The specified endpoint descriptor is invalid.
*
* Return:
* Upon successful completion, scif_poll() returns a non-negative value. A
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index aa16c064294f..5b6c541e4e1b 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -443,7 +443,7 @@ struct vb2_buf_ops {
* @fileio_read_once: report EOF after reading the first buffer
* @fileio_write_immediately: queue buffer after each write() call
* @allow_zero_bytesused: allow bytesused == 0 to be passed to the driver
- * @quirk_poll_must_check_waiting_for_buffers: Return %POLLERR at poll when QBUF
+ * @quirk_poll_must_check_waiting_for_buffers: Return %EPOLLERR at poll when QBUF
* has not been called. This is a vb1 idiom that has been adopted
* also by vb2.
* @lock: pointer to a mutex that protects the &struct vb2_queue. The
@@ -493,7 +493,7 @@ struct vb2_buf_ops {
* @error: a fatal error occurred on the queue
* @waiting_for_buffers: used in poll() to check if vb2 is still waiting for
* buffers. Only set for capture queues if qbuf has not yet been
- * called since poll() needs to return %POLLERR in that situation.
+ * called since poll() needs to return %EPOLLERR in that situation.
* @is_multiplanar: set if buffer type is multiplanar
* @is_output: set if buffer type is output
* @copy_timestamp: set if vb2-core should set timestamps
@@ -869,7 +869,7 @@ void vb2_core_queue_release(struct vb2_queue *q);
* @q: pointer to &struct vb2_queue with videobuf2 queue.
*
* Flag that a fatal unrecoverable error has occurred and wake up all processes
- * waiting on the queue. Polling will now set %POLLERR and queuing and dequeuing
+ * waiting on the queue. Polling will now set %EPOLLERR and queuing and dequeuing
* buffers will return %-EIO.
*
* The error flag will be cleared when canceling the queue, either from
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 5a54c9570977..500f81375200 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int inet_ctl_sock_create(struct sock **sk, unsigned short family,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 6692d67e9245..c1a93ce35e62 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -310,7 +310,7 @@ void inet_csk_prepare_forced_close(struct sock *sk);
static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
{
return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
- (POLLIN | POLLRDNORM) : 0;
+ (EPOLLIN | EPOLLRDNORM) : 0;
}
int inet_csk_listen_start(struct sock *sk, int backlog);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8606c9113d3f..7a98cd583c73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/sock.h b/include/net/sock.h
index 169c92afcafa..3aa7b7d6e6c7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1584,7 +1584,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
__poll_t sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
index 639fade14b23..41b509f410bf 100644
--- a/include/uapi/asm-generic/poll.h
+++ b/include/uapi/asm-generic/poll.h
@@ -3,50 +3,36 @@
#define __ASM_GENERIC_POLL_H
/* These are specified by iBCS2 */
-#define POLLIN (__force __poll_t)0x0001
-#define POLLPRI (__force __poll_t)0x0002
-#define POLLOUT (__force __poll_t)0x0004
-#define POLLERR (__force __poll_t)0x0008
-#define POLLHUP (__force __poll_t)0x0010
-#define POLLNVAL (__force __poll_t)0x0020
+#define POLLIN 0x0001
+#define POLLPRI 0x0002
+#define POLLOUT 0x0004
+#define POLLERR 0x0008
+#define POLLHUP 0x0010
+#define POLLNVAL 0x0020
/* The rest seem to be more-or-less nonstandard. Check them! */
-#define POLLRDNORM (__force __poll_t)0x0040
-#define POLLRDBAND (__force __poll_t)0x0080
+#define POLLRDNORM 0x0040
+#define POLLRDBAND 0x0080
#ifndef POLLWRNORM
-#define POLLWRNORM (__force __poll_t)0x0100
+#define POLLWRNORM 0x0100
#endif
#ifndef POLLWRBAND
-#define POLLWRBAND (__force __poll_t)0x0200
+#define POLLWRBAND 0x0200
#endif
#ifndef POLLMSG
-#define POLLMSG (__force __poll_t)0x0400
+#define POLLMSG 0x0400
#endif
#ifndef POLLREMOVE
-#define POLLREMOVE (__force __poll_t)0x1000
+#define POLLREMOVE 0x1000
#endif
#ifndef POLLRDHUP
-#define POLLRDHUP (__force __poll_t)0x2000
+#define POLLRDHUP 0x2000
#endif
#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */
#define POLL_BUSY_LOOP (__force __poll_t)0x8000
-#ifdef __KERNEL__
-#ifndef __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- return (__force __u16)val;
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- return (__force __poll_t)v;
-}
-#endif
-#endif
-
struct pollfd {
int fd;
short events;
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 63e21be30f15..bf48e71f2634 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -28,20 +28,21 @@
#define EPOLL_CTL_MOD 3
/* Epoll event masks */
-#define EPOLLIN 0x00000001
-#define EPOLLPRI 0x00000002
-#define EPOLLOUT 0x00000004
-#define EPOLLERR 0x00000008
-#define EPOLLHUP 0x00000010
-#define EPOLLRDNORM 0x00000040
-#define EPOLLRDBAND 0x00000080
-#define EPOLLWRNORM 0x00000100
-#define EPOLLWRBAND 0x00000200
-#define EPOLLMSG 0x00000400
-#define EPOLLRDHUP 0x00002000
+#define EPOLLIN (__force __poll_t)0x00000001
+#define EPOLLPRI (__force __poll_t)0x00000002
+#define EPOLLOUT (__force __poll_t)0x00000004
+#define EPOLLERR (__force __poll_t)0x00000008
+#define EPOLLHUP (__force __poll_t)0x00000010
+#define EPOLLNVAL (__force __poll_t)0x00000020
+#define EPOLLRDNORM (__force __poll_t)0x00000040
+#define EPOLLRDBAND (__force __poll_t)0x00000080
+#define EPOLLWRNORM (__force __poll_t)0x00000100
+#define EPOLLWRBAND (__force __poll_t)0x00000200
+#define EPOLLMSG (__force __poll_t)0x00000400
+#define EPOLLRDHUP (__force __poll_t)0x00002000
/* Set exclusive wakeup mode for the target file descriptor */
-#define EPOLLEXCLUSIVE (1U << 28)
+#define EPOLLEXCLUSIVE (__force __poll_t)(1U << 28)
/*
* Request the handling of system wakeup events so as to prevent system suspends
@@ -53,13 +54,13 @@
*
* Requires CAP_BLOCK_SUSPEND
*/
-#define EPOLLWAKEUP (1U << 29)
+#define EPOLLWAKEUP (__force __poll_t)(1U << 29)
/* Set the One Shot behaviour for the target file descriptor */
-#define EPOLLONESHOT (1U << 30)
+#define EPOLLONESHOT (__force __poll_t)(1U << 30)
/* Set the Edge Triggered behaviour for the target file descriptor */
-#define EPOLLET (1U << 31)
+#define EPOLLET (__force __poll_t)(1U << 31)
/*
* On x86-64 make the 64bit structure have the same alignment as the
@@ -74,7 +75,7 @@
#endif
struct epoll_event {
- __u32 events;
+ __poll_t events;
__u64 data;
} EPOLL_PACKED;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8fb90a0819c3..0fb5ef939732 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_S390_CMMA_MIGRATION */
#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+ __u64 addr;
+ __u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+};
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
new file mode 100644
index 000000000000..3d77fe91239a
--- /dev/null
+++ b/include/uapi/linux/psp-sev.h
@@ -0,0 +1,142 @@
+/*
+ * Userspace interface for AMD Secure Encrypted Virtualization (SEV)
+ * platform management commands.
+ *
+ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <[email protected]>
+ *
+ * SEV spec 0.14 is available at:
+ * http://support.amd.com/TechDocs/55766_SEV-KM%20API_Specification.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PSP_SEV_USER_H__
+#define __PSP_SEV_USER_H__
+
+#include <linux/types.h>
+
+/**
+ * SEV platform commands
+ */
+enum {
+ SEV_FACTORY_RESET = 0,
+ SEV_PLATFORM_STATUS,
+ SEV_PEK_GEN,
+ SEV_PEK_CSR,
+ SEV_PDH_GEN,
+ SEV_PDH_CERT_EXPORT,
+ SEV_PEK_CERT_IMPORT,
+
+ SEV_MAX,
+};
+
+/**
+ * SEV Firmware status code
+ */
+typedef enum {
+ SEV_RET_SUCCESS = 0,
+ SEV_RET_INVALID_PLATFORM_STATE,
+ SEV_RET_INVALID_GUEST_STATE,
+ SEV_RET_INAVLID_CONFIG,
+ SEV_RET_INVALID_len,
+ SEV_RET_ALREADY_OWNED,
+ SEV_RET_INVALID_CERTIFICATE,
+ SEV_RET_POLICY_FAILURE,
+ SEV_RET_INACTIVE,
+ SEV_RET_INVALID_ADDRESS,
+ SEV_RET_BAD_SIGNATURE,
+ SEV_RET_BAD_MEASUREMENT,
+ SEV_RET_ASID_OWNED,
+ SEV_RET_INVALID_ASID,
+ SEV_RET_WBINVD_REQUIRED,
+ SEV_RET_DFFLUSH_REQUIRED,
+ SEV_RET_INVALID_GUEST,
+ SEV_RET_INVALID_COMMAND,
+ SEV_RET_ACTIVE,
+ SEV_RET_HWSEV_RET_PLATFORM,
+ SEV_RET_HWSEV_RET_UNSAFE,
+ SEV_RET_UNSUPPORTED,
+ SEV_RET_MAX,
+} sev_ret_code;
+
+/**
+ * struct sev_user_data_status - PLATFORM_STATUS command parameters
+ *
+ * @major: major API version
+ * @minor: minor API version
+ * @state: platform state
+ * @flags: platform config flags
+ * @build: firmware build id for API version
+ * @guest_count: number of active guests
+ */
+struct sev_user_data_status {
+ __u8 api_major; /* Out */
+ __u8 api_minor; /* Out */
+ __u8 state; /* Out */
+ __u32 flags; /* Out */
+ __u8 build; /* Out */
+ __u32 guest_count; /* Out */
+} __packed;
+
+/**
+ * struct sev_user_data_pek_csr - PEK_CSR command parameters
+ *
+ * @address: PEK certificate chain
+ * @length: length of certificate
+ */
+struct sev_user_data_pek_csr {
+ __u64 address; /* In */
+ __u32 length; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_user_data_cert_import - PEK_CERT_IMPORT command parameters
+ *
+ * @pek_address: PEK certificate chain
+ * @pek_len: length of PEK certificate
+ * @oca_address: OCA certificate chain
+ * @oca_len: length of OCA certificate
+ */
+struct sev_user_data_pek_cert_import {
+ __u64 pek_cert_address; /* In */
+ __u32 pek_cert_len; /* In */
+ __u64 oca_cert_address; /* In */
+ __u32 oca_cert_len; /* In */
+} __packed;
+
+/**
+ * struct sev_user_data_pdh_cert_export - PDH_CERT_EXPORT command parameters
+ *
+ * @pdh_address: PDH certificate address
+ * @pdh_len: length of PDH certificate
+ * @cert_chain_address: PDH certificate chain
+ * @cert_chain_len: length of PDH certificate chain
+ */
+struct sev_user_data_pdh_cert_export {
+ __u64 pdh_cert_address; /* In */
+ __u32 pdh_cert_len; /* In/Out */
+ __u64 cert_chain_address; /* In */
+ __u32 cert_chain_len; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_issue_cmd - SEV ioctl parameters
+ *
+ * @cmd: SEV commands to execute
+ * @opaque: pointer to the command structure
+ * @error: SEV FW return code on failure
+ */
+struct sev_issue_cmd {
+ __u32 cmd; /* In */
+ __u64 data; /* In */
+ __u32 error; /* Out */
+} __packed;
+
+#define SEV_IOC_TYPE 'S'
+#define SEV_ISSUE_CMD _IOWR(SEV_IOC_TYPE, 0x0, struct sev_issue_cmd)
+
+#endif /* __PSP_USER_SEV_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 360e564ae7d1..d7f309f74dec 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -578,10 +578,10 @@ static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *po
spin_lock(&info->lock);
if (info->attr.mq_curmsgs)
- retval = POLLIN | POLLRDNORM;
+ retval = EPOLLIN | EPOLLRDNORM;
if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
- retval |= POLLOUT | POLLWRNORM;
+ retval |= EPOLLOUT | EPOLLWRNORM;
spin_unlock(&info->lock);
return retval;
diff --git a/kernel/configs/kvm_guest.config b/kernel/configs/kvm_guest.config
index 8d9643767142..108fecc20fc1 100644
--- a/kernel/configs/kvm_guest.config
+++ b/kernel/configs/kvm_guest.config
@@ -18,6 +18,7 @@ CONFIG_VIRTUALIZATION=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_KVM_GUEST=y
+CONFIG_S390_GUEST=y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f0549e79978b..96db9ae5d5af 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4524,7 +4524,7 @@ static __poll_t perf_poll(struct file *file, poll_table *wait)
{
struct perf_event *event = file->private_data;
struct ring_buffer *rb;
- __poll_t events = POLLHUP;
+ __poll_t events = EPOLLHUP;
poll_wait(file, &event->waitq, wait);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 141aa2ca8728..6c6b3c48db71 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -19,7 +19,7 @@
static void perf_output_wakeup(struct perf_output_handle *handle)
{
- atomic_set(&handle->rb->poll, POLLIN);
+ atomic_set(&handle->rb->poll, EPOLLIN);
handle->event->pending_wakeup = 1;
irq_work_queue(&handle->event->pending);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index db4b9b8929eb..fc1123583fa6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -930,7 +930,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
__poll_t ret = 0;
if (!user)
- return POLLERR|POLLNVAL;
+ return EPOLLERR|EPOLLNVAL;
poll_wait(file, &log_wait, wait);
@@ -938,9 +938,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
if (user->seq < log_next_seq) {
/* return error when data has vanished underneath us */
if (user->seq < log_first_seq)
- ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
+ ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
else
- ret = POLLIN|POLLRDNORM;
+ ret = EPOLLIN|EPOLLRDNORM;
}
logbuf_unlock_irq();
diff --git a/kernel/relay.c b/kernel/relay.c
index f7f40a6e6352..c3029402f15c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -924,12 +924,12 @@ static __poll_t relay_file_poll(struct file *filp, poll_table *wait)
struct rchan_buf *buf = filp->private_data;
if (buf->finalized)
- return POLLERR;
+ return EPOLLERR;
if (filp->f_mode & FMODE_READ) {
poll_wait(filp, &buf->read_wait, wait);
if (!relay_buf_empty(buf))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
return mask;
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 94ad46d50b56..fe56c4e06c51 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -74,7 +74,7 @@ static __poll_t posix_clock_poll(struct file *fp, poll_table *wait)
__poll_t result = 0;
if (!clk)
- return POLLERR;
+ return EPOLLERR;
if (clk->ops.poll)
result = clk->ops.poll(clk, fp, wait);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ca6930e0d25e..dcf1c4dd3efe 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -627,7 +627,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*
- * Returns POLLIN | POLLRDNORM if data exists in the buffers,
+ * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers,
* zero otherwise.
*/
__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
@@ -665,7 +665,7 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 56608538a4ad..20a2300ae4e8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5623,13 +5623,13 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (tr->trace_flags & TRACE_ITER_BLOCK)
/*
* Always select as readable when in blocking mode
*/
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
else
return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
filp, poll_table);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 13b35ffa021e..670e99b68aa6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3688,7 +3688,7 @@ static void memcg_event_remove(struct work_struct *work)
}
/*
- * Gets called on POLLHUP on eventfd when user closes it.
+ * Gets called on EPOLLHUP on eventfd when user closes it.
*
* Called with wqh->lock held and interrupts disabled.
*/
@@ -3700,7 +3700,7 @@ static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode,
struct mem_cgroup *memcg = event->memcg;
__poll_t flags = key_to_poll(key);
- if (flags & POLLHUP) {
+ if (flags & EPOLLHUP) {
/*
* If the event has been detached at cgroup removal, we
* can simply return knowing the other side will cleanup
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 42fe5653814a..c7a33717d079 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2705,10 +2705,10 @@ static __poll_t swaps_poll(struct file *file, poll_table *wait)
if (seq->poll_event != atomic_read(&proc_poll_event)) {
seq->poll_event = atomic_read(&proc_poll_event);
- return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
+ return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
}
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
/* iterator */
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d6f7f7cb79c4..0cfba919d167 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -240,7 +240,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
if (!ts) {
if (err)
*err = -EREMOTEIO;
- return POLLERR;
+ return EPOLLERR;
}
if (!ts->rd->f_op->poll)
@@ -253,7 +253,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
n = DEFAULT_POLLMASK;
else
n = ts->wr->f_op->poll(ts->wr, pt);
- ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+ ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
}
return ret;
@@ -396,11 +396,11 @@ end_clear:
if (!list_empty(&m->req_list)) {
if (test_and_clear_bit(Rpending, &m->wsched))
- n = POLLIN;
+ n = EPOLLIN;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
+ if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
schedule_work(&m->rq);
}
@@ -505,11 +505,11 @@ end_clear:
if (m->wsize || !list_empty(&m->unsent_req_list)) {
if (test_and_clear_bit(Wpending, &m->wsched))
- n = POLLOUT;
+ n = EPOLLOUT;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if ((n & POLLOUT) &&
+ if ((n & EPOLLOUT) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
schedule_work(&m->wq);
@@ -599,12 +599,12 @@ static void p9_conn_create(struct p9_client *client)
init_poll_funcptr(&m->pt, p9_pollwait);
n = p9_fd_poll(client, &m->pt, NULL);
- if (n & POLLIN) {
+ if (n & EPOLLIN) {
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
set_bit(Rpending, &m->wsched);
}
- if (n & POLLOUT) {
+ if (n & EPOLLOUT) {
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
set_bit(Wpending, &m->wsched);
}
@@ -625,12 +625,12 @@ static void p9_poll_mux(struct p9_conn *m)
return;
n = p9_fd_poll(m->client, NULL, &err);
- if (n & (POLLERR | POLLHUP | POLLNVAL)) {
+ if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
p9_conn_cancel(m, err);
}
- if (n & POLLIN) {
+ if (n & EPOLLIN) {
set_bit(Rpending, &m->wsched);
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
if (!test_and_set_bit(Rworksched, &m->wsched)) {
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
}
}
- if (n & POLLOUT) {
+ if (n & EPOLLOUT) {
set_bit(Wpending, &m->wsched);
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
@@ -678,11 +678,11 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
spin_unlock(&client->lock);
if (test_and_clear_bit(Wpending, &m->wsched))
- n = POLLOUT;
+ n = EPOLLOUT;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+ if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
schedule_work(&m->wq);
return 0;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
* fields into the sockaddr.
*/
static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_at sat;
struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
if (atalk_autobind(sk) < 0)
goto out;
- *uaddr_len = sizeof(struct sockaddr_at);
memset(&sat, 0, sizeof(sat));
if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
sat.sat_port = at->src_port;
}
- err = 0;
sat.sat_family = AF_APPLETALK;
memcpy(uaddr, &sat, sizeof(sat));
+ err = sizeof(struct sockaddr_at);
out:
release_sock(sk);
diff --git a/net/atm/common.c b/net/atm/common.c
index 6523f38c4957..fc78a0508ae1 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -661,15 +661,15 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
/* exceptional events */
if (sk->sk_err)
- mask = POLLERR;
+ mask = EPOLLERR;
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* writable? */
if (sock->state == SS_CONNECTING &&
@@ -678,7 +678,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
if (vcc->qos.txtp.traffic_class != ATM_NONE &&
vcc_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
}
static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmpvc *addr;
struct atm_vcc *vcc = ATM_SD(sock);
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
- *sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
addr->sap_addr.vci = vcc->vci;
- return 0;
+ return sizeof(struct sockaddr_atmpvc);
}
static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
}
static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmsvc *addr;
- *sockaddr_len = sizeof(struct sockaddr_atmsvc);
addr = (struct sockaddr_atmsvc *) sockaddr;
memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
sizeof(struct sockaddr_atmsvc));
- return 0;
+ return sizeof(struct sockaddr_atmsvc);
}
int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..c8319ed48485 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
}
static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_digipeater[0] = null_ax25_address;
}
}
- *uaddr_len = sizeof (struct full_sockaddr_ax25);
+ err = sizeof (struct full_sockaddr_ax25);
out:
release_sock(sk);
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 581375d0eed2..e91f29c7c638 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -304,7 +304,7 @@ static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
poll_wait(file, &socket_client->queue_wait, wait);
if (socket_client->queue_len > 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 9be74a44e99d..dc9fa37ddd14 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -193,7 +193,7 @@ static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
poll_wait(file, &debug_log->queue_wait, wait);
if (!batadv_log_empty(debug_log))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f897681780db..84d92a077834 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -431,7 +431,7 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
if (sk->sk_state == BT_CONNECTED ||
(test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
sk->sk_state == BT_CONNECT2))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -451,20 +451,20 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return bt_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == BT_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == BT_CONNECT ||
sk->sk_state == BT_CONNECT2 ||
@@ -472,7 +472,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return mask;
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
}
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
goto done;
}
- *addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
haddr->hci_channel= hci_pi(sk)->channel;
+ err = sizeof(*haddr);
done:
release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
}
static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_l2);
la->l2_psm = chan->psm;
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
la->l2_bdaddr_type = chan->src_type;
}
- return 0;
+ return sizeof(struct sockaddr_l2);
}
static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..93a3b219db09 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -533,7 +533,7 @@ done:
return err;
}
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
@@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
else
bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
- *len = sizeof(struct sockaddr_rc);
- return 0;
+ return sizeof(struct sockaddr_rc);
}
static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
}
static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_sco);
if (peer)
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
- return 0;
+ return sizeof(struct sockaddr_sco);
}
static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index b109445a1df9..a6fb1b3bcad9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -924,7 +924,7 @@ static int caif_release(struct socket *sock)
caif_disconnect_client(sock_net(sk), &cf_sk->layer);
cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
- wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
+ wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP);
sock_orphan(sk);
sk_stream_kill_queues(&cf_sk->sk);
@@ -946,23 +946,23 @@ static __poll_t caif_poll(struct file *file,
/* exceptional events? */
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
addr->can_family = AF_CAN;
addr->can_ifindex = ro->ifindex;
- *len = sizeof(*addr);
-
- return 0;
+ return sizeof(*addr);
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7d9293940b5..9938952c5c78 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -75,7 +75,7 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i
/*
* Avoid a wakeup if event not interesting for us
*/
- if (key && !(key_to_poll(key) & (POLLIN | POLLERR)))
+ if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -842,22 +842,22 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
@@ -865,7 +865,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* writable? */
if (sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index b026e1717df4..04e5e27c9b81 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1274,7 +1274,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
@@ -2497,7 +2498,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
@@ -2619,7 +2620,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_poll(&wq->wait, POLLERR);
+ wake_up_interruptible_poll(&wq->wait, EPOLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
}
@@ -2631,8 +2632,8 @@ static void sock_def_readable(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -2649,8 +2650,8 @@ static void sock_def_write_space(struct sock *sk)
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
diff --git a/net/core/stream.c b/net/core/stream.c
index 1cff9c6270c6..7d329fb1f553 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -38,8 +38,8 @@ void sk_stream_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 74685fecfdb9..15bdc002d90c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -338,21 +338,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
mask = 0;
if (sk->sk_err)
- mask = POLLERR;
+ mask = EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
* IO signal will be lost.
*/
if (sk_stream_is_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index cc1b505453a8..45cb5bea884b 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
{
struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- *uaddr_len = sizeof(struct sockaddr_dn);
-
lock_sock(sk);
if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_dn);
}
@@ -1216,7 +1214,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wai
__poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue))
- mask |= POLLRDBAND;
+ mask |= EPOLLRDBAND;
return mask;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c24008daa3d8..f98e2f0db841 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept);
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_addr.s_addr = addr;
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
@@ -828,7 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_CLOSE:
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
- POLLHUP, even on eg. unconnected UDP sockets -- RR */
+ EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
/* fall through */
default:
sk->sk_shutdown |= how;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c059aa7df0a9..48636aee23c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -512,36 +512,36 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask = 0;
/*
- * POLLHUP is certainly not done right. But poll() doesn't
+ * EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
* socket the read side is more interesting.
*
- * Some poll() documentation says that POLLHUP is incompatible
- * with the POLLOUT/POLLWR flags, so somebody should check this
+ * Some poll() documentation says that EPOLLHUP is incompatible
+ * with the EPOLLOUT/POLLWR flags, so somebody should check this
* all. But careful, it tends to be safer to return too many
* bits than too few, and you can easily break real applications
* if you don't tell them that something has hung up!
*
* Check-me.
*
- * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and
+ * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
* our fs/select.c). It means that after we received EOF,
* poll always returns immediately, making impossible poll() on write()
- * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
+ * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
* if and only if shutdown has been made in both directions.
* Actually, it is interesting to look how Solaris and DUX
- * solve this dilemma. I would prefer, if POLLHUP were maskable,
+ * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
* then we could set it on SND_SHUTDOWN. BTW examples given
* in Stevens' books assume exactly this behaviour, it explains
- * why POLLHUP is incompatible with POLLOUT. --ANK
+ * why EPOLLHUP is incompatible with EPOLLOUT. --ANK
*
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected or passive Fast Open socket? */
if (state != TCP_SYN_SENT &&
@@ -554,11 +554,11 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
target++;
if (tp->rcv_nxt - tp->copied_seq >= target)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -570,24 +570,24 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
smp_mb__after_atomic();
if (sk_stream_is_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
} else
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tp->urg_data & TCP_URG_VALID)
- mask |= POLLPRI;
+ mask |= EPOLLPRI;
} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* Active TCP fastopen socket with defer_connect
- * Return POLLOUT so application can call write()
+ * Return EPOLLOUT so application can call write()
* in order for kernel to generate SYN+data
*/
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cfa51cfd2d99..575d3c1fb6e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -315,7 +315,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
/* Fast Recovery (RFC 5681 3.2) :
* Cubic needs 1.7 factor, rounded to 2 to include
- * extra cushion (application might react slowly to POLLOUT)
+ * extra cushion (application might react slowly to EPOLLOUT)
*/
sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
sndmem *= nr_segs * per_mss;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f81f969f9c06..bfaefe560b5c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2501,12 +2501,12 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
- if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
- mask &= ~(POLLIN | POLLRDNORM);
+ mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..c1e292db04db 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 64331158d693..81ce15ffb878 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -989,14 +989,13 @@ done:
}
static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
addr->sa_family = AF_IUCV;
- *len = sizeof(struct sockaddr_iucv);
if (peer) {
memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
- return 0;
+ return sizeof(struct sockaddr_iucv);
}
/**
@@ -1483,7 +1482,7 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
sk = (struct sock *) isk;
if (sk->sk_state == IUCV_CONNECTED)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -1501,27 +1500,27 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
return iucv_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == IUCV_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == IUCV_DISCONN)
- mask |= POLLIN;
+ mask |= EPOLLIN;
if (sock_writeable(sk) && iucv_below_msglim(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 4a8d407f8902..f297d53a11aa 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -396,8 +396,8 @@ static int kcm_read_sock_done(struct strparser *strp, int err)
static void psock_state_change(struct sock *sk)
{
- /* TCP only does a POLLIN for a half close. Do a POLLHUP here
- * since application will normally not poll with POLLIN
+ /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
+ * since application will normally not poll with EPOLLIN
* on the TCP sockets.
*/
@@ -1338,7 +1338,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
* we set sk_state, otherwise epoll_wait always returns right away with
- * POLLHUP
+ * EPOLLHUP
*/
kcm->sk.sk_state = TCP_ESTABLISHED;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ff61124fdf59..4614585e1720 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -349,7 +349,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
}
static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -370,8 +370,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_conn_id = lsk->conn_id;
lsa->l2tp_addr.s_addr = addr;
}
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 192344688c06..efea58b66295 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -421,7 +421,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
}
static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
struct sock *sk = sock->sk;
@@ -449,8 +449,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
}
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = sk->sk_bound_dev_if;
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 59f246d7b290..99a03c72db4f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -870,7 +870,7 @@ err:
/* getname() support.
*/
static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = 0;
int error = 0;
@@ -969,8 +969,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(uaddr, &sp, len);
}
- *usockaddr_len = len;
- error = 0;
+ error = len;
sock_put(sk);
end:
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c38d16f22d2a..01dcc0823d1f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -971,7 +971,7 @@ release:
* Return the address information of a socket.
*/
static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddrlen, int peer)
+ int peer)
{
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
- *uaddrlen = sizeof(sllc);
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
IFHWADDRLEN);
}
}
- rc = 0;
sllc.sllc_family = AF_LLC;
memcpy(uaddr, &sllc, sizeof(sllc));
+ rc = sizeof(sllc);
out:
release_sock(sk);
return rc;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2ad445c1d27c..3c8af14330b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1105,7 +1105,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
}
static int netlink_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1113,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_family = AF_NETLINK;
nladdr->nl_pad = 0;
- *addr_len = sizeof(*nladdr);
if (peer) {
nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1123,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
}
- return 0;
+ return sizeof(*nladdr);
}
static int netlink_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 9ba30c63be3d..35bb6807927f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -829,11 +829,12 @@ out_release:
}
static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
+ int uaddr_len;
memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
sax->fsa_ax25.sax25_call = nr->user_addr;
memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
sax->fsa_digipeater[0] = nr->dest_addr;
- *uaddr_len = sizeof(struct full_sockaddr_ax25);
+ uaddr_len = sizeof(struct full_sockaddr_ax25);
} else {
sax->fsa_ax25.sax25_family = AF_NETROM;
sax->fsa_ax25.sax25_ndigis = 0;
sax->fsa_ax25.sax25_call = nr->source_addr;
- *uaddr_len = sizeof(struct sockaddr_ax25);
+ uaddr_len = sizeof(struct sockaddr_ax25);
}
release_sock(sk);
- return 0;
+ return uaddr_len;
}
int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 985909f105eb..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -497,7 +497,7 @@ error:
}
static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
llcp_sock->dsap, llcp_sock->ssap);
memset(llcp_addr, 0, sizeof(*llcp_addr));
- *len = sizeof(struct sockaddr_nfc_llcp);
lock_sock(sk);
if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
llcp_addr->service_name_len);
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_nfc_llcp);
}
static inline __poll_t llcp_accept_poll(struct sock *parent)
@@ -543,7 +542,7 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
sk = &llcp_sock->sk;
if (sk->sk_state == LLCP_CONNECTED)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -563,23 +562,23 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
return llcp_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == LLCP_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d1483007e46..616cb9c18f88 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3409,7 +3409,7 @@ out:
}
static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
- *uaddr_len = sizeof(*uaddr);
- return 0;
+ return sizeof(*uaddr);
}
static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_halen = 0;
}
rcu_read_unlock();
- *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
- return 0;
+ return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
}
static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
@@ -4085,7 +4083,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock,
if (po->rx_ring.pg_vec) {
if (!packet_previous_rx_frame(po, &po->rx_ring,
TP_STATUS_KERNEL))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
po->pressure = 0;
@@ -4093,7 +4091,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock,
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
spin_unlock_bh(&sk->sk_write_queue.lock);
return mask;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 08f6751d2030..f9b40e6a18a5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
}
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
pn->sobject);
- *sockaddr_len = sizeof(struct sockaddr_pn);
- return 0;
+ return sizeof(struct sockaddr_pn);
}
static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
@@ -351,18 +350,18 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_CLOSE)
- return POLLERR;
+ return EPOLLERR;
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!skb_queue_empty(&pn->ctrlreq_queue))
- mask |= POLLPRI;
+ mask |= EPOLLPRI;
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
- return POLLHUP;
+ return EPOLLHUP;
if (sk->sk_state == TCP_ESTABLISHED &&
refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
atomic_read(&pn->tx_credits))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5fb3929e3d7d..b33e5aeb4c06 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
}
static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
}
release_sock(sk);
- *len = sizeof(qaddr);
qaddr.sq_family = AF_QIPCRTR;
memcpy(saddr, &qaddr, sizeof(qaddr));
- return 0;
+ return sizeof(qaddr);
}
static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 88aa8ad0f5b6..0a8eefd256b3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -110,7 +110,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
}
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,23 +131,22 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_family = AF_INET;
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
/*
* RDS' poll is without a doubt the least intuitive part of the interface,
- * as POLLIN and POLLOUT do not behave entirely as you would expect from
+ * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from
* a network protocol.
*
- * POLLIN is asserted if
+ * EPOLLIN is asserted if
* - there is data on the receive queue.
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
* update, or a RDMA completion).
*
- * POLLOUT is asserted if there is room on the send queue. This does not mean
+ * EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
* to send to a congested destination, the system call may still fail (and
* return ENOBUFS).
@@ -167,22 +166,22 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
read_lock_irqsave(&rs->rs_recv_lock, flags);
if (!rs->rs_cong_monitor) {
- /* When a congestion map was updated, we signal POLLIN for
+ /* When a congestion map was updated, we signal EPOLLIN for
* "historical" reasons. Applications can also poll for
* WRBAND instead. */
if (rds_cong_updated_since(&rs->rs_cong_track))
- mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND);
} else {
spin_lock(&rs->rs_lock);
if (rs->rs_cong_notify)
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
!list_empty(&rs->rs_notify_queue))
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
/* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 44c4652721af..08230a145042 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
struct rds_tcp_connection *tc;
unsigned long flags;
struct sockaddr_in sin;
- int sinlen;
struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
sock = tc->t_sock;
if (sock) {
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 0);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
tsinfo.local_addr = sin.sin_addr.s_addr;
tsinfo.local_port = sin.sin_port;
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 1);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
tsinfo.peer_addr = sin.sin_addr.s_addr;
tsinfo.peer_port = sin.sin_port;
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 124c77e9d058..59d0eb960275 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1142,13 +1142,13 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
{
struct rfkill_data *data = file->private_data;
- __poll_t res = POLLOUT | POLLWRNORM;
+ __poll_t res = EPOLLOUT | EPOLLWRNORM;
poll_wait(file, &data->read_wait, wait);
mutex_lock(&data->mtx);
if (!list_empty(&data->events))
- res = POLLIN | POLLRDNORM;
+ res = EPOLLIN | EPOLLRDNORM;
mutex_unlock(&data->mtx);
return res;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 083bd251406f..5170373b797c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -938,7 +938,7 @@ out_release:
}
static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
srose->srose_digis[n] = rose->source_digis[n];
}
- *uaddr_len = sizeof(struct full_sockaddr_rose);
- return 0;
+ return sizeof(struct full_sockaddr_rose);
}
int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 21ad6a3a465c..0c9c18aa7c77 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -742,13 +742,13 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
/* the socket is readable if there are any messages waiting on the Rx
* queue */
if (!list_empty(&rx->recvmsg_q))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* the socket is writable if there is space to add new data to the
* socket; there is no guarantee that any particular call in progress
* on the socket may have space in the Tx ACK window */
if (rxrpc_writable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e35d4f73d2df..0d873c58e516 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
int rc;
- rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+ rc = inet6_getname(sock, uaddr, peer);
- if (rc != 0)
+ if (rc < 0)
return rc;
- *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+ rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
(union sctp_addr *)uaddr);
return rc;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ebb8cb9eb0bd..bf271f8c2dc9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7602,22 +7602,22 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
return (!list_empty(&sp->ep->asocs)) ?
- (POLLIN | POLLRDNORM) : 0;
+ (EPOLLIN | EPOLLRDNORM) : 0;
mask = 0;
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* Is it readable? Reconsider this code with TCP-style support. */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* The association is either gone or not ready. */
if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
@@ -7625,7 +7625,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is it writable? */
if (sctp_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/*
@@ -7637,7 +7637,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
* in the following code to cover it as well.
*/
if (sctp_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
}
@@ -8161,8 +8161,8 @@ void sctp_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 3583c8ab1bae..38ae22b65e77 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -281,7 +281,6 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
struct in_device *in_dev;
struct sockaddr_in addr;
int rc = -ENOENT;
- int len;
if (!dst) {
rc = -ENOTCONN;
@@ -293,7 +292,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
+ kernel_getsockname(clcsock, (struct sockaddr *)&addr);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
rcu_read_lock();
in_dev = __in_dev_get_rcu(dst->dev);
@@ -771,7 +770,7 @@ static void smc_listen_work(struct work_struct *work)
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
- int rc = 0, len;
+ int rc = 0;
__be32 subnet;
u8 prefix_len;
u8 ibport;
@@ -824,7 +823,7 @@ static void smc_listen_work(struct work_struct *work)
}
/* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
+ kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
@@ -1075,7 +1074,7 @@ out:
}
static int smc_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct smc_sock *smc;
@@ -1085,7 +1084,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
smc = smc_sk(sock->sk);
- return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+ return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
}
static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
@@ -1141,11 +1140,11 @@ out:
static __poll_t smc_accept_poll(struct sock *parent)
{
struct smc_sock *isk = smc_sk(parent);
- int mask = 0;
+ __poll_t mask = 0;
spin_lock(&isk->accept_q_lock);
if (!list_empty(&isk->accept_q))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
spin_unlock(&isk->accept_q_lock);
return mask;
@@ -1160,7 +1159,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
int rc;
if (!sk)
- return POLLNVAL;
+ return EPOLLNVAL;
smc = smc_sk(sock->sk);
sock_hold(sk);
@@ -1171,16 +1170,16 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
- if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
+ if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
sk->sk_err = smc->clcsock->sk->sk_err;
if (sk->sk_err) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
rc = smc_connect_rdma(smc);
if (rc < 0)
- mask |= POLLERR;
+ mask |= EPOLLERR;
/* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
} else {
@@ -1190,27 +1189,27 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
lock_sock(sk);
}
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == SMC_LISTEN) {
/* woken up by sk_data_ready in smc_listen_work() */
mask = smc_accept_poll(sk);
} else {
if (atomic_read(&smc->conn.sndbuf_space) ||
sk->sk_shutdown & SEND_SHUTDOWN) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
}
if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ mask |= EPOLLIN;
}
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 9dc392ca06bf..eff4e0d0bb31 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -35,8 +35,8 @@ static void smc_rx_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 838bce20c361..72f004c9c9b1 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -46,8 +46,8 @@ static void smc_tx_write_space(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait,
- POLLOUT | POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM |
+ EPOLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
diff --git a/net/socket.c b/net/socket.c
index a93c99b518ca..fac8246a8ae8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
goto out_fd;
if (upeer_sockaddr) {
- if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
- &len, 2) < 0) {
+ len = newsock->ops->getname(newsock,
+ (struct sockaddr *)&address, 2);
+ if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
}
@@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
@@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
- if (err)
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ if (err < 0)
goto out_put;
- err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
out_put:
fput_light(sock->file, fput_needed);
@@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
@@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
return err;
}
- err =
- sock->ops->getname(sock, (struct sockaddr *)&address, &len,
- 1);
- if (!err)
- err = move_addr_to_user(&address, len, usockaddr,
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ if (err >= 0)
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr,
usockaddr_len);
fput_light(sock->file, fput_needed);
}
@@ -3166,17 +3167,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
}
EXPORT_SYMBOL(kernel_connect);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 0);
+ return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 1);
+ return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index aa36dad32db1..8a7e1c774f9c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -940,7 +940,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
poll_wait(filp, &queue_wait, wait);
/* alway allow write */
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (!rp)
return mask;
@@ -950,7 +950,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
for (cq= &rp->q; &cq->list != &cd->queue;
cq = list_entry(cq->list.next, struct cache_queue, list))
if (!cq->reader) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
break;
}
spin_unlock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e432ecd7f99..806395687bb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
- struct sockaddr *buf, int buflen)
+ struct sockaddr *buf)
{
struct socket *sock;
int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
goto out_release;
}
- err = kernel_getsockname(sock, buf, &buflen);
+ err = kernel_getsockname(sock, buf);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
rcu_read_unlock();
rpc_set_port(sap, 0);
- err = rpc_sockname(net, sap, salen, buf, buflen);
+ err = rpc_sockname(net, sap, salen, buf);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5c4330325787..fc97fc3ed637 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -345,15 +345,15 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
{
struct inode *inode = file_inode(filp);
struct rpc_inode *rpci = RPC_I(inode);
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
poll_wait(filp, &rpci->waitq, wait);
inode_lock(inode);
if (rpci->pipe == NULL)
- mask |= POLLERR | POLLHUP;
+ mask |= EPOLLERR | EPOLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
inode_unlock(inode);
return mask;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..08cd951aaeea 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- err = kernel_getpeername(newsock, sin, &slen);
+ err = kernel_getpeername(newsock, sin);
if (err < 0) {
net_warn_ratelimited("%s: peername failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
+ slen = err;
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
if (IS_ERR(newsvsk))
goto failed;
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
- err = kernel_getsockname(newsock, sin, &slen);
+ err = kernel_getsockname(newsock, sin);
+ slen = err;
if (unlikely(err < 0)) {
dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
slen = offsetof(struct sockaddr, sa_data);
@@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
err = PTR_ERR(svsk);
goto out;
}
- if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+ salen = kernel_getsockname(svsk->sk_sock, sin);
+ if (salen >= 0)
svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
if (error < 0)
goto bummer;
- newlen = len;
- error = kernel_getsockname(sock, newsin, &newlen);
+ error = kernel_getsockname(sock, newsin);
if (error < 0)
goto bummer;
+ newlen = error;
if (protocol == IPPROTO_TCP) {
if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a6b8c1f8f92a..956e29c1438d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
- int buflen;
unsigned short port = 0;
- if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+ if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
goto out;
switch (buf.ss_family) {
case AF_INET6:
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 163f3a547501..f93477187a90 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -665,7 +665,7 @@ exit:
* a completely predictable manner).
*/
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct sock *sk = sock->sk;
@@ -684,13 +684,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
addr->addr.id.node = tn->own_addr;
}
- *uaddr_len = sizeof(*addr);
addr->addrtype = TIPC_ADDR_ID;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
- return 0;
+ return sizeof(*addr);
}
/**
@@ -721,31 +720,31 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- revents |= POLLRDHUP | POLLIN | POLLRDNORM;
+ revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- revents |= POLLHUP;
+ revents |= EPOLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
- revents |= POLLOUT;
+ revents |= EPOLLOUT;
/* fall thru' */
case TIPC_LISTEN:
if (!skb_queue_empty(&sk->sk_receive_queue))
- revents |= POLLIN | POLLRDNORM;
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_OPEN:
if (tsk->group_is_open && !tsk->cong_link_cnt)
- revents |= POLLOUT;
+ revents |= EPOLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
if (skb_queue_empty(&sk->sk_receive_queue))
break;
- revents |= POLLIN | POLLRDNORM;
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_DISCONNECTING:
- revents = POLLIN | POLLRDNORM | POLLHUP;
+ revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
break;
}
return revents;
@@ -1897,8 +1896,8 @@ static void tipc_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
rcu_read_unlock();
}
@@ -1914,8 +1913,8 @@ static void tipc_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM | EPOLLRDBAND);
rcu_read_unlock();
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0214acbd6bff..723698416242 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -415,9 +415,9 @@ static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
{
unix_dgram_peer_wake_disconnect(sk, other);
wake_up_interruptible_poll(sk_sleep(sk),
- POLLOUT |
- POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT |
+ EPOLLWRNORM |
+ EPOLLWRBAND);
}
/* preconditions:
@@ -454,7 +454,7 @@ static void unix_write_space(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
- POLLOUT | POLLWRNORM | POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
@@ -1453,7 +1453,7 @@ out:
}
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
@@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
if (!u->addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
- *uaddr_len = sizeof(short);
+ err = sizeof(short);
} else {
struct unix_address *addr = u->addr;
- *uaddr_len = addr->len;
- memcpy(sunaddr, addr->name, *uaddr_len);
+ err = addr->len;
+ memcpy(sunaddr, addr->name, addr->len);
}
unix_state_unlock(sk);
sock_put(sk);
@@ -2129,8 +2129,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
if (wq_has_sleeper(&u->peer_wait))
wake_up_interruptible_sync_poll(&u->peer_wait,
- POLLOUT | POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM |
+ EPOLLWRBAND);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
@@ -2650,27 +2650,27 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* exceptional events? */
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
if (unix_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
@@ -2687,29 +2687,29 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
if (sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* No write status requested, avoid expensive OUT tests. */
- if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
+ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
@@ -2726,7 +2726,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
}
if (writable)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9d95e773f4c8..aac9b8f6552e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
static int vsock_getname(struct socket *sock,
- struct sockaddr *addr, int *addr_len, int peer)
+ struct sockaddr *addr, int peer)
{
int err;
struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
*/
BUILD_BUG_ON(sizeof(*vm_addr) > 128);
memcpy(addr, vm_addr, sizeof(*vm_addr));
- *addr_len = sizeof(*vm_addr);
+ err = sizeof(*vm_addr);
out:
release_sock(sk);
@@ -865,20 +865,20 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (sk->sk_err)
/* Signify that there has been an error on this socket. */
- mask |= POLLERR;
+ mask |= EPOLLERR;
/* INET sockets treat local write shutdown and peer write shutdown as a
- * case of POLLHUP set.
+ * case of EPOLLHUP set.
*/
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
((sk->sk_shutdown & SEND_SHUTDOWN) &&
(vsk->peer_shutdown & SEND_SHUTDOWN))) {
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
}
if (sk->sk_shutdown & RCV_SHUTDOWN ||
vsk->peer_shutdown & SEND_SHUTDOWN) {
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
}
if (sock->type == SOCK_DGRAM) {
@@ -888,11 +888,11 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN)) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
} else if (sock->type == SOCK_STREAM) {
lock_sock(sk);
@@ -902,7 +902,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* If there is something in the queue then we can read. */
if (transport->stream_is_active(vsk) &&
@@ -911,10 +911,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
int ret = transport->notify_poll_in(
vsk, 1, &data_ready_now);
if (ret < 0) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
if (data_ready_now)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
}
@@ -925,7 +925,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (sk->sk_shutdown & RCV_SHUTDOWN ||
vsk->peer_shutdown & SEND_SHUTDOWN) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
/* Connected sockets that can produce data can be written. */
@@ -935,25 +935,25 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
int ret = transport->notify_poll_out(
vsk, 1, &space_avail_now);
if (ret < 0) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
if (space_avail_now)
- /* Remove POLLWRBAND since INET
+ /* Remove EPOLLWRBAND since INET
* sockets are not setting it.
*/
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
}
/* Simulate INET socket poll behaviors, which sets
- * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+ * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 562cc11131f6..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -896,7 +896,7 @@ out:
}
static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
sx25->sx25_addr = x25->source_addr;
sx25->sx25_family = AF_X25;
- *uaddr_len = sizeof(*sx25);
+ rc = sizeof(*sx25);
out:
return rc;
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 4d202b73a0e1..a9428daa69f3 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -580,7 +580,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
mutex_lock_nested(&rev->ns->lock, rev->ns->level);
poll_wait(file, &rev->ns->wait, pt);
if (rev->last_read < rev->ns->revision)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&rev->ns->lock);
}
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
index 558e3076d38c..479b03a7a17e 100644
--- a/security/tomoyo/audit.c
+++ b/security/tomoyo/audit.c
@@ -456,14 +456,14 @@ void tomoyo_read_log(struct tomoyo_io_buffer *head)
* @file: Pointer to "struct file".
* @wait: Pointer to "poll_table". Maybe NULL.
*
- * Returns POLLIN | POLLRDNORM when ready to read an audit log.
+ * Returns EPOLLIN | EPOLLRDNORM when ready to read an audit log.
*/
__poll_t tomoyo_poll_log(struct file *file, poll_table *wait)
{
if (tomoyo_log_count)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(file, &tomoyo_log_wait, wait);
if (tomoyo_log_count)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 70c73bf66c88..03923a138ef5 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2116,17 +2116,17 @@ static struct tomoyo_domain_info *tomoyo_find_domain_by_qid
* @file: Pointer to "struct file".
* @wait: Pointer to "poll_table".
*
- * Returns POLLIN | POLLRDNORM when ready to read, 0 otherwise.
+ * Returns EPOLLIN | EPOLLRDNORM when ready to read, 0 otherwise.
*
* Waits for access requests which violated policy in enforcing mode.
*/
static __poll_t tomoyo_poll_query(struct file *file, poll_table *wait)
{
if (!list_empty(&tomoyo_query_list))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(file, &tomoyo_query_wait, wait);
if (!list_empty(&tomoyo_query_list))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
@@ -2450,15 +2450,15 @@ int tomoyo_open_control(const u8 type, struct file *file)
* @file: Pointer to "struct file".
* @wait: Pointer to "poll_table". Maybe NULL.
*
- * Returns POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM if ready to read/write,
- * POLLOUT | POLLWRNORM otherwise.
+ * Returns EPOLLIN | EPOLLRDNORM | EPOLLOUT | EPOLLWRNORM if ready to read/write,
+ * EPOLLOUT | EPOLLWRNORM otherwise.
*/
__poll_t tomoyo_poll_control(struct file *file, poll_table *wait)
{
struct tomoyo_io_buffer *head = file->private_data;
if (head->poll)
- return head->poll(file, wait) | POLLOUT | POLLWRNORM;
- return POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM;
+ return head->poll(file, wait) | EPOLLOUT | EPOLLWRNORM;
+ return EPOLLIN | EPOLLRDNORM | EPOLLOUT | EPOLLWRNORM;
}
/**
diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c
index cd6932e5225c..9094f4b3b367 100644
--- a/security/tomoyo/network.c
+++ b/security/tomoyo/network.c
@@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock)
return 0;
{
const int error = sock->ops->getname(sock, (struct sockaddr *)
- &addr, &addr_len, 0);
+ &addr, 0);
- if (error)
+ if (error < 0)
return error;
+ addr_len = error;
}
address.protocol = type;
address.operation = TOMOYO_NETWORK_LISTEN;
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index fb9bf99deb35..1d3d7e7a1f05 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -154,8 +154,8 @@ static int tomoyo_release(struct inode *inode, struct file *file)
* @file: Pointer to "struct file".
* @wait: Pointer to "poll_table". Maybe NULL.
*
- * Returns POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM if ready to read/write,
- * POLLOUT | POLLWRNORM otherwise.
+ * Returns EPOLLIN | EPOLLRDNORM | EPOLLOUT | EPOLLWRNORM if ready to read/write,
+ * EPOLLOUT | EPOLLWRNORM otherwise.
*/
static __poll_t tomoyo_poll(struct file *file, poll_table *wait)
{
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index a12b9555e910..4563432badba 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -399,9 +399,9 @@ static int snd_compr_mmap(struct file *f, struct vm_area_struct *vma)
static __poll_t snd_compr_get_poll(struct snd_compr_stream *stream)
{
if (stream->direction == SND_COMPRESS_PLAYBACK)
- return POLLOUT | POLLWRNORM;
+ return EPOLLOUT | EPOLLWRNORM;
else
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
static __poll_t snd_compr_poll(struct file *f, poll_table *wait)
@@ -412,7 +412,7 @@ static __poll_t snd_compr_poll(struct file *f, poll_table *wait)
__poll_t retval = 0;
if (snd_BUG_ON(!data))
- return POLLERR;
+ return EPOLLERR;
stream = &data->stream;
@@ -421,7 +421,7 @@ static __poll_t snd_compr_poll(struct file *f, poll_table *wait)
switch (stream->runtime->state) {
case SNDRV_PCM_STATE_OPEN:
case SNDRV_PCM_STATE_XRUN:
- retval = snd_compr_get_poll(stream) | POLLERR;
+ retval = snd_compr_get_poll(stream) | EPOLLERR;
goto out;
default:
break;
@@ -447,7 +447,7 @@ static __poll_t snd_compr_poll(struct file *f, poll_table *wait)
retval = snd_compr_get_poll(stream);
break;
default:
- retval = snd_compr_get_poll(stream) | POLLERR;
+ retval = snd_compr_get_poll(stream) | EPOLLERR;
break;
}
out:
diff --git a/sound/core/control.c b/sound/core/control.c
index 50fa16022f1f..0b3026d937b1 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1679,7 +1679,7 @@ static __poll_t snd_ctl_poll(struct file *file, poll_table * wait)
mask = 0;
if (!list_empty(&ctl->events))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/sound/core/info.c b/sound/core/info.c
index aa86f3f8e056..4b36767af9e1 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -214,9 +214,9 @@ static __poll_t snd_info_entry_poll(struct file *file, poll_table *wait)
data->file_private_data,
file, wait);
if (entry->c.ops->read)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (entry->c.ops->write)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/sound/core/init.c b/sound/core/init.c
index 8753440c3a6e..4fa5dd955740 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -346,7 +346,7 @@ static int snd_disconnect_release(struct inode *inode, struct file *file)
static __poll_t snd_disconnect_poll(struct file * file, poll_table * wait)
{
- return POLLERR | POLLNVAL;
+ return EPOLLERR | EPOLLNVAL;
}
static long snd_disconnect_ioctl(struct file *file,
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 3ebba9c7f86e..b044c0a5a674 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2705,7 +2705,7 @@ static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait)
if (runtime->status->state != SNDRV_PCM_STATE_DRAINING &&
(runtime->status->state != SNDRV_PCM_STATE_RUNNING ||
snd_pcm_oss_playback_ready(psubstream)))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
snd_pcm_stream_unlock_irq(psubstream);
}
if (csubstream != NULL) {
@@ -2715,7 +2715,7 @@ static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait)
snd_pcm_stream_lock_irq(csubstream);
if ((ostate = runtime->status->state) != SNDRV_PCM_STATE_RUNNING ||
snd_pcm_oss_capture_ready(csubstream))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
snd_pcm_stream_unlock_irq(csubstream);
if (ostate != SNDRV_PCM_STATE_RUNNING && runtime->oss.trigger) {
struct snd_pcm_oss_file ofile;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 51104df924e1..77ba50ddcf9e 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3147,7 +3147,7 @@ static __poll_t snd_pcm_playback_poll(struct file *file, poll_table * wait)
substream = pcm_file->substream;
if (PCM_RUNTIME_CHECK(substream))
- return POLLOUT | POLLWRNORM | POLLERR;
+ return EPOLLOUT | EPOLLWRNORM | EPOLLERR;
runtime = substream->runtime;
poll_wait(file, &runtime->sleep, wait);
@@ -3159,7 +3159,7 @@ static __poll_t snd_pcm_playback_poll(struct file *file, poll_table * wait)
case SNDRV_PCM_STATE_PREPARED:
case SNDRV_PCM_STATE_PAUSED:
if (avail >= runtime->control->avail_min) {
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
break;
}
/* Fall through */
@@ -3167,7 +3167,7 @@ static __poll_t snd_pcm_playback_poll(struct file *file, poll_table * wait)
mask = 0;
break;
default:
- mask = POLLOUT | POLLWRNORM | POLLERR;
+ mask = EPOLLOUT | EPOLLWRNORM | EPOLLERR;
break;
}
snd_pcm_stream_unlock_irq(substream);
@@ -3186,7 +3186,7 @@ static __poll_t snd_pcm_capture_poll(struct file *file, poll_table * wait)
substream = pcm_file->substream;
if (PCM_RUNTIME_CHECK(substream))
- return POLLIN | POLLRDNORM | POLLERR;
+ return EPOLLIN | EPOLLRDNORM | EPOLLERR;
runtime = substream->runtime;
poll_wait(file, &runtime->sleep, wait);
@@ -3198,19 +3198,19 @@ static __poll_t snd_pcm_capture_poll(struct file *file, poll_table * wait)
case SNDRV_PCM_STATE_PREPARED:
case SNDRV_PCM_STATE_PAUSED:
if (avail >= runtime->control->avail_min) {
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
break;
}
mask = 0;
break;
case SNDRV_PCM_STATE_DRAINING:
if (avail > 0) {
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
break;
}
/* Fall through */
default:
- mask = POLLIN | POLLRDNORM | POLLERR;
+ mask = EPOLLIN | EPOLLRDNORM | EPOLLERR;
break;
}
snd_pcm_stream_unlock_irq(substream);
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index fae21311723f..69616d00481c 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -1385,11 +1385,11 @@ static __poll_t snd_rawmidi_poll(struct file *file, poll_table * wait)
mask = 0;
if (rfile->input != NULL) {
if (snd_rawmidi_ready(rfile->input))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (rfile->output != NULL) {
if (snd_rawmidi_ready(rfile->output))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
}
diff --git a/sound/core/seq/oss/seq_oss_rw.c b/sound/core/seq/oss/seq_oss_rw.c
index c538e78ca310..30886f5fb100 100644
--- a/sound/core/seq/oss/seq_oss_rw.c
+++ b/sound/core/seq/oss/seq_oss_rw.c
@@ -204,13 +204,13 @@ snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wai
/* input */
if (dp->readq && is_read_mode(dp->file_mode)) {
if (snd_seq_oss_readq_poll(dp->readq, file, wait))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
/* output */
if (dp->writeq && is_write_mode(dp->file_mode)) {
if (snd_seq_kernel_client_write_poll(dp->cseq, file, wait))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
}
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index b611deef81f5..60db32785f62 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1101,7 +1101,7 @@ static __poll_t snd_seq_poll(struct file *file, poll_table * wait)
/* check if data is available in the outqueue */
if (snd_seq_fifo_poll_wait(client->data.user.fifo, file, wait))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT) {
@@ -1109,7 +1109,7 @@ static __poll_t snd_seq_poll(struct file *file, poll_table * wait)
/* check if data is available in the pool */
if (!snd_seq_write_pool_allocated(client) ||
snd_seq_pool_poll_wait(client->pool, file, wait))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
diff --git a/sound/core/timer.c b/sound/core/timer.c
index da05e314917f..dc87728c5b74 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -2084,9 +2084,9 @@ static __poll_t snd_timer_user_poll(struct file *file, poll_table * wait)
mask = 0;
spin_lock_irq(&tu->qlock);
if (tu->qused)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (tu->disconnected)
- mask |= POLLERR;
+ mask |= EPOLLERR;
spin_unlock_irq(&tu->qlock);
return mask;
diff --git a/sound/firewire/bebob/bebob_hwdep.c b/sound/firewire/bebob/bebob_hwdep.c
index 83e791810c52..04c321e08c62 100644
--- a/sound/firewire/bebob/bebob_hwdep.c
+++ b/sound/firewire/bebob/bebob_hwdep.c
@@ -63,7 +63,7 @@ hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_table *wait)
spin_lock_irq(&bebob->lock);
if (bebob->dev_lock_changed)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&bebob->lock);
diff --git a/sound/firewire/dice/dice-hwdep.c b/sound/firewire/dice/dice-hwdep.c
index 7a8af0f91c96..6498bf6909ba 100644
--- a/sound/firewire/dice/dice-hwdep.c
+++ b/sound/firewire/dice/dice-hwdep.c
@@ -62,7 +62,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&dice->lock);
if (dice->dev_lock_changed || dice->notification_bits != 0)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&dice->lock);
diff --git a/sound/firewire/digi00x/digi00x-hwdep.c b/sound/firewire/digi00x/digi00x-hwdep.c
index a084c2a834db..426cd39e0233 100644
--- a/sound/firewire/digi00x/digi00x-hwdep.c
+++ b/sound/firewire/digi00x/digi00x-hwdep.c
@@ -70,7 +70,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&dg00x->lock);
if (dg00x->dev_lock_changed || dg00x->msg)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&dg00x->lock);
diff --git a/sound/firewire/fireface/ff-hwdep.c b/sound/firewire/fireface/ff-hwdep.c
index 68e273fa5d23..336c0076ec42 100644
--- a/sound/firewire/fireface/ff-hwdep.c
+++ b/sound/firewire/fireface/ff-hwdep.c
@@ -62,7 +62,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&ff->lock);
if (ff->dev_lock_changed)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&ff->lock);
diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
index e0eff9328ee1..5cac26ab20b7 100644
--- a/sound/firewire/fireworks/fireworks_hwdep.c
+++ b/sound/firewire/fireworks/fireworks_hwdep.c
@@ -194,12 +194,12 @@ hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_table *wait)
spin_lock_irq(&efw->lock);
if (efw->dev_lock_changed || efw->pull_ptr != efw->push_ptr)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&efw->lock);
- return events | POLLOUT;
+ return events | EPOLLOUT;
}
static int
diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c
index 7b6a086866e7..5f772eab588b 100644
--- a/sound/firewire/motu/motu-hwdep.c
+++ b/sound/firewire/motu/motu-hwdep.c
@@ -69,12 +69,12 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&motu->lock);
if (motu->dev_lock_changed || motu->msg)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&motu->lock);
- return events | POLLOUT;
+ return events | EPOLLOUT;
}
static int hwdep_get_info(struct snd_motu *motu, void __user *arg)
diff --git a/sound/firewire/oxfw/oxfw-hwdep.c b/sound/firewire/oxfw/oxfw-hwdep.c
index 6c1828aff672..50a1c03b42b9 100644
--- a/sound/firewire/oxfw/oxfw-hwdep.c
+++ b/sound/firewire/oxfw/oxfw-hwdep.c
@@ -62,7 +62,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&oxfw->lock);
if (oxfw->dev_lock_changed)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&oxfw->lock);
diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c
index 37b21647b471..4e4c1e9020e8 100644
--- a/sound/firewire/tascam/tascam-hwdep.c
+++ b/sound/firewire/tascam/tascam-hwdep.c
@@ -60,7 +60,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file,
spin_lock_irq(&tscm->lock);
if (tscm->dev_lock_changed)
- events = POLLIN | POLLRDNORM;
+ events = EPOLLIN | EPOLLRDNORM;
else
events = 0;
spin_unlock_irq(&tscm->lock);
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 6b57f8aac1b7..8c0f8a9ee0ba 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -684,7 +684,7 @@ static __poll_t sq_poll(struct file *file, struct poll_table_struct *wait)
poll_wait(file, &write_sq.action_queue, wait);
if (file->f_mode & FMODE_WRITE)
if (write_sq.count < write_sq.max_active || write_sq.block_size - write_sq.rear_size > 0)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 05ccc7fdcc09..56537a156580 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -246,7 +246,7 @@ static __poll_t snd_usb_sbrc_hwdep_poll(struct snd_hwdep *hw, struct file *file,
struct usb_mixer_interface *mixer = hw->private_data;
poll_wait(file, &mixer->rc_waitq, wait);
- return mixer->rc_code ? POLLIN | POLLRDNORM : 0;
+ return mixer->rc_code ? EPOLLIN | EPOLLRDNORM : 0;
}
static int snd_usb_soundblaster_remote_init(struct usb_mixer_interface *mixer)
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index e2be10d17118..ebcab5c5465d 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -280,7 +280,7 @@ static __poll_t usb_stream_hwdep_poll(struct snd_hwdep *hw,
poll_wait(file, &us122l->sk.sleep, wait);
- mask = POLLIN | POLLOUT | POLLWRNORM | POLLERR;
+ mask = EPOLLIN | EPOLLOUT | EPOLLWRNORM | EPOLLERR;
if (mutex_trylock(&us122l->mutex)) {
struct usb_stream *s = us122l->sk.s;
if (s && s->state == usb_stream_ready) {
@@ -290,7 +290,7 @@ static __poll_t usb_stream_hwdep_poll(struct snd_hwdep *hw,
polled = &us122l->second_periods_polled;
if (*polled != s->periods_done) {
*polled = s->periods_done;
- mask = POLLIN | POLLOUT | POLLWRNORM;
+ mask = EPOLLIN | EPOLLOUT | EPOLLWRNORM;
} else
mask = 0;
}
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index 07d15bae75bc..d8bd7c99b48c 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -92,12 +92,12 @@ static __poll_t snd_us428ctls_poll(struct snd_hwdep *hw, struct file *file, poll
struct usX2Ydev *us428 = hw->private_data;
struct us428ctls_sharedmem *shm = us428->us428ctls_sharedmem;
if (us428->chip_status & USX2Y_STAT_CHIP_HUP)
- return POLLHUP;
+ return EPOLLHUP;
poll_wait(file, &us428->us428ctls_wait_queue_head, wait);
if (shm != NULL && shm->CtlSnapShotLast != shm->CtlSnapShotRed)
- mask |= POLLIN;
+ mask |= EPOLLIN;
return mask;
}
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 70691c08e1ed..cca7e065a075 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -51,3 +51,6 @@ config KVM_COMPAT
config HAVE_KVM_IRQ_BYPASS
bool
+
+config HAVE_KVM_VCPU_ASYNC_IOCTL
+ bool
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index cc29a8148328..70268c0bec79 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,7 +92,6 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
struct arch_timer_context *vtimer;
- u32 cnt_ctl;
/*
* We may see a timer interrupt after vcpu_put() has been called which
@@ -104,15 +103,11 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
return IRQ_HANDLED;
vtimer = vcpu_vtimer(vcpu);
- if (!vtimer->irq.level) {
- cnt_ctl = read_sysreg_el0(cntv_ctl);
- cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
- ARCH_TIMER_CTRL_IT_MASK;
- if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
- kvm_timer_update_irq(vcpu, true, vtimer);
- }
+ if (kvm_timer_should_fire(vtimer))
+ kvm_timer_update_irq(vcpu, true, vtimer);
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ if (static_branch_unlikely(&userspace_irqchip_in_use) &&
+ unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_vtimer_update_mask_user(vcpu);
return IRQ_HANDLED;
@@ -238,6 +233,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
+ if (timer_ctx->loaded) {
+ u32 cnt_ctl;
+
+ /* Only the virtual timer can be loaded so far */
+ cnt_ctl = read_sysreg_el0(cntv_ctl);
+ return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
+ (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
+ !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
+ }
+
if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
@@ -252,15 +257,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- if (vtimer->irq.level || ptimer->irq.level)
- return true;
-
- /*
- * When this is called from withing the wait loop of kvm_vcpu_block(),
- * the software view of the timer state is up to date (timer->loaded
- * is false), and so we can simply check if the timer should fire now.
- */
- if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
+ if (kvm_timer_should_fire(vtimer))
return true;
return kvm_timer_should_fire(ptimer);
@@ -278,9 +275,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
/* Populate the device bitmap with the timer states */
regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
KVM_ARM_DEV_EL1_PTIMER);
- if (vtimer->irq.level)
+ if (kvm_timer_should_fire(vtimer))
regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
- if (ptimer->irq.level)
+ if (kvm_timer_should_fire(ptimer))
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
}
@@ -293,7 +290,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
+ likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_ctx->irq.level,
@@ -331,12 +329,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ bool level;
if (unlikely(!timer->enabled))
return;
- if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
- kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
+ /*
+ * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
+ * of its lifecycle is offloaded to the hardware, and we therefore may
+ * not have lowered the irq.level value before having to signal a new
+ * interrupt, but have to signal an interrupt every time the level is
+ * asserted.
+ */
+ level = kvm_timer_should_fire(vtimer);
+ kvm_timer_update_irq(vcpu, level, vtimer);
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
@@ -344,6 +350,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
phys_timer_emulate(vcpu);
}
+static void __timer_snapshot_state(struct arch_timer_context *timer)
+{
+ timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ timer->cnt_cval = read_sysreg_el0(cntv_cval);
+}
+
static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -355,10 +367,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
if (!vtimer->loaded)
goto out;
- if (timer->enabled) {
- vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
- }
+ if (timer->enabled)
+ __timer_snapshot_state(vtimer);
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
@@ -456,8 +466,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
bool phys_active;
int ret;
- phys_active = vtimer->irq.level ||
- kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+ phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
@@ -504,8 +513,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
- return vtimer->irq.level != vlevel ||
- ptimer->irq.level != plevel;
+ return kvm_timer_should_fire(vtimer) != vlevel ||
+ kvm_timer_should_fire(ptimer) != plevel;
}
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
@@ -537,54 +546,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
set_cntvoff(0);
}
-static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
+/*
+ * With a userspace irqchip we have to check if the guest de-asserted the
+ * timer and if so, unmask the timer irq signal on the host interrupt
+ * controller to ensure that we see future timer signals.
+ */
+static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- kvm_vtimer_update_mask_user(vcpu);
- return;
- }
-
- /*
- * If the guest disabled the timer without acking the interrupt, then
- * we must make sure the physical and virtual active states are in
- * sync by deactivating the physical interrupt, because otherwise we
- * wouldn't see the next timer interrupt in the host.
- */
- if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
- int ret;
- ret = irq_set_irqchip_state(host_vtimer_irq,
- IRQCHIP_STATE_ACTIVE,
- false);
- WARN_ON(ret);
+ __timer_snapshot_state(vtimer);
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ kvm_vtimer_update_mask_user(vcpu);
+ }
}
}
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if any of the timers have expired while we were running in the guest,
- * and inject an interrupt if that was the case.
- */
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
- /*
- * If we entered the guest with the vtimer output asserted we have to
- * check if the guest has modified the timer so that we should lower
- * the line at this point.
- */
- if (vtimer->irq.level) {
- vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
- if (!kvm_timer_should_fire(vtimer)) {
- kvm_timer_update_irq(vcpu, false, vtimer);
- unmask_vtimer_irq(vcpu);
- }
- }
+ unmask_vtimer_irq_user(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -818,6 +800,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
return true;
}
+bool kvm_arch_timer_get_input_level(int vintid)
+{
+ struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
+ struct arch_timer_context *timer;
+
+ if (vintid == vcpu_vtimer(vcpu)->irq.irq)
+ timer = vcpu_vtimer(vcpu);
+ else
+ BUG(); /* We only map the vtimer so far */
+
+ return kvm_timer_should_fire(timer);
+}
+
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -839,7 +834,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
+ ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+ kvm_arch_timer_get_input_level);
if (ret)
return ret;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 7e3941f2ecde..86941f6181bb 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -71,17 +71,17 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
- BUG_ON(preemptible());
__this_cpu_write(kvm_arm_running_vcpu, vcpu);
}
+DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
/**
* kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
* Must be called from non-preemptible context
*/
struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
{
- BUG_ON(preemptible());
return __this_cpu_read(kvm_arm_running_vcpu);
}
@@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ static_branch_dec(&userspace_irqchip_in_use);
+
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@@ -381,17 +384,24 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ vcpu_load(vcpu);
+
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
@@ -400,10 +410,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu_power_off(vcpu);
break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ vcpu_put(vcpu);
+ return ret;
}
/**
@@ -524,14 +535,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
vcpu->arch.has_run_once = true;
- /*
- * Map the VGIC hardware resources before running a vcpu the first
- * time on this VM.
- */
- if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
- ret = kvm_vgic_map_resources(kvm);
- if (ret)
- return ret;
+ if (likely(irqchip_in_kernel(kvm))) {
+ /*
+ * Map the VGIC hardware resources before running a vcpu the
+ * first time on this VM.
+ */
+ if (unlikely(!vgic_ready(kvm))) {
+ ret = kvm_vgic_map_resources(kvm);
+ if (ret)
+ return ret;
+ }
+ } else {
+ /*
+ * Tell the rest of the code that there are userspace irqchip
+ * VMs in the wild.
+ */
+ static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -619,21 +638,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
+ vcpu_load(vcpu);
+
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
- return ret;
+ goto out;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
- return ret;
- if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
- return 0;
+ goto out;
+ if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
+ ret = 0;
+ goto out;
+ }
}
- if (run->immediate_exit)
- return -EINTR;
+ if (run->immediate_exit) {
+ ret = -EINTR;
+ goto out;
+ }
kvm_sigset_activate(vcpu);
@@ -666,19 +691,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vgic_flush_hwstate(vcpu);
/*
- * If we have a singal pending, or need to notify a userspace
- * irqchip about timer or PMU level changes, then we exit (and
- * update the timer level state in kvm_timer_update_run
- * below).
+ * Exit if we have a signal pending so that we can deliver the
+ * signal to user space.
*/
- if (signal_pending(current) ||
- kvm_timer_should_notify_user(vcpu) ||
- kvm_pmu_should_notify_user(vcpu)) {
+ if (signal_pending(current)) {
ret = -EINTR;
run->exit_reason = KVM_EXIT_INTR;
}
/*
+ * If we're using a userspace irqchip, then check if we need
+ * to tell a userspace irqchip about timer or PMU level
+ * changes and if so, exit to userspace (the actual level
+ * state gets updated in kvm_timer_update_run and
+ * kvm_pmu_update_run below).
+ */
+ if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+ if (kvm_timer_should_notify_user(vcpu) ||
+ kvm_pmu_should_notify_user(vcpu)) {
+ ret = -EINTR;
+ run->exit_reason = KVM_EXIT_INTR;
+ }
+ }
+
+ /*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -690,7 +726,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
kvm_pmu_sync_hwstate(vcpu);
- kvm_timer_sync_hwstate(vcpu);
+ if (static_branch_unlikely(&userspace_irqchip_in_use))
+ kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
preempt_enable();
@@ -738,7 +775,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* we don't want vtimer interrupts to race with syncing the
* timer virtual interrupt state.
*/
- kvm_timer_sync_hwstate(vcpu);
+ if (static_branch_unlikely(&userspace_irqchip_in_use))
+ kvm_timer_sync_hwstate(vcpu);
/*
* We may have taken a host interrupt in HYP mode (ie
@@ -779,6 +817,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
+out:
+ vcpu_put(vcpu);
return ret;
}
@@ -994,66 +1034,88 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
+ long r;
+
+ vcpu_load(vcpu);
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
struct kvm_vcpu_init init;
+ r = -EFAULT;
if (copy_from_user(&init, argp, sizeof(init)))
- return -EFAULT;
+ break;
- return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+ r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+ break;
}
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -ENOEXEC;
if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
+ break;
+ r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
- return -EFAULT;
+ break;
+
if (ioctl == KVM_SET_ONE_REG)
- return kvm_arm_set_reg(vcpu, &reg);
+ r = kvm_arm_set_reg(vcpu, &reg);
else
- return kvm_arm_get_reg(vcpu, &reg);
+ r = kvm_arm_get_reg(vcpu, &reg);
+ break;
}
case KVM_GET_REG_LIST: {
struct kvm_reg_list __user *user_list = argp;
struct kvm_reg_list reg_list;
unsigned n;
+ r = -ENOEXEC;
if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
+ break;
+ r = -EFAULT;
if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
n = reg_list.n;
reg_list.n = kvm_arm_num_regs(vcpu);
if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
+ r = -E2BIG;
if (n < reg_list.n)
- return -E2BIG;
- return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+ break;
+ r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+ break;
}
case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_set_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_set_attr(vcpu, &attr);
+ break;
}
case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_get_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_get_attr(vcpu, &attr);
+ break;
}
case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_has_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_has_attr(vcpu, &attr);
+ break;
}
default:
- return -EINVAL;
+ r = -EINVAL;
}
+
+ vcpu_put(vcpu);
+ return r;
}
/**
@@ -1246,6 +1308,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
cpu_hyp_reset();
return NOTIFY_OK;
+ case CPU_PM_ENTER_FAILED:
case CPU_PM_EXIT:
if (__this_cpu_read(kvm_arm_hardware_enabled))
/* The hardware was enabled before suspend. */
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index d7fd46fe9efb..4fe6e797e8b3 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,6 +21,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
{
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index f8eaf86b740a..ec62d1cccab7 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -924,6 +924,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
return 0;
}
+static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
+{
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pmdp = stage2_get_pmd(kvm, NULL, addr);
+ if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
+ return false;
+
+ if (pmd_thp_or_huge(*pmdp))
+ return kvm_s2pmd_exec(pmdp);
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
+ return false;
+
+ return kvm_s2pte_exec(ptep);
+}
+
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pte_t *new_pte,
unsigned long flags)
@@ -1255,10 +1274,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
- unsigned long size)
+static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
- __coherent_cache_guest_page(vcpu, pfn, size);
+ __clean_dcache_guest_page(pfn, size);
+}
+
+static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+ __invalidate_icache_guest_page(pfn, size);
}
static void kvm_send_hwpoison_signal(unsigned long address,
@@ -1284,7 +1307,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long fault_status)
{
int ret;
- bool write_fault, writable, hugetlb = false, force_pte = false;
+ bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
struct kvm *kvm = vcpu->kvm;
@@ -1296,7 +1319,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long flags = 0;
write_fault = kvm_is_write_fault(vcpu);
- if (fault_status == FSC_PERM && !write_fault) {
+ exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
+ VM_BUG_ON(write_fault && exec_fault);
+
+ if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1389,7 +1415,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
new_pmd = kvm_s2pmd_mkwrite(new_pmd);
kvm_set_pfn_dirty(pfn);
}
- coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
+
+ if (fault_status != FSC_PERM)
+ clean_dcache_guest_page(pfn, PMD_SIZE);
+
+ if (exec_fault) {
+ new_pmd = kvm_s2pmd_mkexec(new_pmd);
+ invalidate_icache_guest_page(pfn, PMD_SIZE);
+ } else if (fault_status == FSC_PERM) {
+ /* Preserve execute if XN was already cleared */
+ if (stage2_is_exec(kvm, fault_ipa))
+ new_pmd = kvm_s2pmd_mkexec(new_pmd);
+ }
+
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1399,7 +1437,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_set_pfn_dirty(pfn);
mark_page_dirty(kvm, gfn);
}
- coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
+
+ if (fault_status != FSC_PERM)
+ clean_dcache_guest_page(pfn, PAGE_SIZE);
+
+ if (exec_fault) {
+ new_pte = kvm_s2pte_mkexec(new_pte);
+ invalidate_icache_guest_page(pfn, PAGE_SIZE);
+ } else if (fault_status == FSC_PERM) {
+ /* Preserve execute if XN was already cleared */
+ if (stage2_is_exec(kvm, fault_ipa))
+ new_pte = kvm_s2pte_mkexec(new_pte);
+ }
+
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8e633bd9cc1e..465095355666 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
device = vgic_its_alloc_device(its, device_id, itt_addr,
num_eventid_bits);
- if (IS_ERR(device))
- return PTR_ERR(device);
- return 0;
+ return PTR_ERR_OR_ZERO(device);
}
/*
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index deb51ee16a3d..83d82bd7dc4e 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -16,6 +16,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <kvm/iodev.h>
+#include <kvm/arm_arch_timer.h>
#include <kvm/arm_vgic.h>
#include "vgic.h"
@@ -122,10 +123,43 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+/*
+ * This function will return the VCPU that performed the MMIO access and
+ * trapped from within the VM, and will return NULL if this is a userspace
+ * access.
+ *
+ * We can disable preemption locally around accessing the per-CPU variable,
+ * and use the resolved vcpu pointer after enabling preemption again, because
+ * even if the current thread is migrated to another CPU, reading the per-CPU
+ * value later will give us the same value as we update the per-CPU variable
+ * in the preempt notifier handlers.
+ */
+static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+
+ preempt_disable();
+ vcpu = kvm_arm_get_running_vcpu();
+ preempt_enable();
+ return vcpu;
+}
+
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->pending_latch = true;
+ vgic_irq_set_phys_active(irq, true);
+}
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ bool is_uaccess = !vgic_get_mmio_requester_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -134,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = true;
-
+ if (irq->hw)
+ vgic_hw_irq_spending(vcpu, irq, is_uaccess);
+ else
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->pending_latch = false;
+
+ /*
+ * We don't want the guest to effectively mask the physical
+ * interrupt by doing a write to SPENDR followed by a write to
+ * CPENDR for HW interrupts, so we clear the active state on
+ * the physical side if the virtual interrupt is not active.
+ * This may lead to taking an additional interrupt on the
+ * host, but that should not be a problem as the worst that
+ * can happen is an additional vgic injection. We also clear
+ * the pending state to maintain proper semantics for edge HW
+ * interrupts.
+ */
+ vgic_irq_set_phys_pending(irq, false);
+ if (!irq->active)
+ vgic_irq_set_phys_active(irq, false);
+}
+
void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ bool is_uaccess = !vgic_get_mmio_requester_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -154,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = false;
+ if (irq->hw)
+ vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
+ else
+ irq->pending_latch = false;
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
@@ -181,27 +246,24 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
return value;
}
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool active, bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->active = active;
+ vgic_irq_set_phys_active(irq, active);
+}
+
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- bool new_active_state)
+ bool active)
{
- struct kvm_vcpu *requester_vcpu;
unsigned long flags;
- spin_lock_irqsave(&irq->irq_lock, flags);
+ struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
- /*
- * The vcpu parameter here can mean multiple things depending on how
- * this function is called; when handling a trap from the kernel it
- * depends on the GIC version, and these functions are also called as
- * part of save/restore from userspace.
- *
- * Therefore, we have to figure out the requester in a reliable way.
- *
- * When accessing VGIC state from user space, the requester_vcpu is
- * NULL, which is fine, because we guarantee that no VCPUs are running
- * when accessing VGIC state from user space so irq->vcpu->cpu is
- * always -1.
- */
- requester_vcpu = kvm_arm_get_running_vcpu();
+ spin_lock_irqsave(&irq->irq_lock, flags);
/*
* If this virtual IRQ was written into a list register, we
@@ -213,14 +275,23 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
* vgic_change_active_prepare) and still has to sync back this IRQ,
* so we release and re-acquire the spin_lock to let the other thread
* sync back the IRQ.
+ *
+ * When accessing VGIC state from user space, requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
- irq->active = new_active_state;
- if (new_active_state)
+ if (irq->hw)
+ vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
+ else
+ irq->active = active;
+
+ if (irq->active)
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
else
spin_unlock_irqrestore(&irq->irq_lock, flags);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 80897102da26..c32d7b93ffd1 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only
+ * observe rising edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample
+ * the physical line and set the line level, because the
+ * device state could have changed or we simply need to
+ * process the still pending interrupt later.
+ *
+ * If this causes us to lower the level, we have to also clear
+ * the physical active state, since we will otherwise never be
+ * told when the interrupt becomes asserted again.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+
+ if (!irq->line_level)
+ vgic_irq_set_phys_active(irq, false);
+ }
+
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= GICH_LR_EOI;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only observe
+ * rising edges as input to the VGIC. We therefore lower the line
+ * level here, so that we can take new virtual IRQs. See
+ * vgic_v2_fold_lr_state for more info.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT))
+ irq->line_level = false;
+
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index f47e8481fa45..6b329414e57a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only
+ * observe rising edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample
+ * the physical line and set the line level, because the
+ * device state could have changed or we simply need to
+ * process the still pending interrupt later.
+ *
+ * If this causes us to lower the level, we have to also clear
+ * the physical active state, since we will otherwise never be
+ * told when the interrupt becomes asserted again.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+
+ if (!irq->line_level)
+ vgic_irq_set_phys_active(irq, false);
+ }
+
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -146,6 +166,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
/*
+ * Level-triggered mapped IRQs are special because we only observe
+ * rising edges as input to the VGIC. We therefore lower the line
+ * level here, so that we can take new virtual IRQs. See
+ * vgic_v3_fold_lr_state for more info.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
+ irq->line_level = false;
+
+ /*
* We currently only support Group1 interrupts, which is a
* known defect. This needs to be addressed at some point.
*/
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index ecb8e25f5fe5..c7c5ef190afa 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -144,6 +144,38 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
kfree(irq);
}
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
+{
+ WARN_ON(irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ pending));
+}
+
+bool vgic_get_phys_line_level(struct vgic_irq *irq)
+{
+ bool line_level;
+
+ BUG_ON(!irq->hw);
+
+ if (irq->get_input_level)
+ return irq->get_input_level(irq->intid);
+
+ WARN_ON(irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &line_level));
+ return line_level;
+}
+
+/* Set/Clear the physical active state */
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
+{
+
+ BUG_ON(!irq->hw);
+ WARN_ON(irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_ACTIVE,
+ active));
+}
+
/**
* kvm_vgic_target_oracle - compute the target vcpu for an irq
*
@@ -413,7 +445,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- unsigned int host_irq)
+ unsigned int host_irq,
+ bool (*get_input_level)(int vindid))
{
struct irq_desc *desc;
struct irq_data *data;
@@ -433,6 +466,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
+ irq->get_input_level = get_input_level;
return 0;
}
@@ -441,10 +475,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
+ irq->get_input_level = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid)
+ u32 vintid, bool (*get_input_level)(int vindid))
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
@@ -453,7 +488,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index efbcf8f96f9c..12c37b89f7a3 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level;
}
+static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
+{
+ return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
+}
+
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -140,6 +145,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
+bool vgic_get_phys_line_level(struct vgic_irq *irq);
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags);
void vgic_kick_vcpus(struct kvm *kvm);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a334399fafec..6e865e8b5b10 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -194,7 +194,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
unsigned seq;
int idx;
- if (flags & POLLIN) {
+ if (flags & EPOLLIN) {
idx = srcu_read_lock(&kvm->irq_srcu);
do {
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
@@ -208,7 +208,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
srcu_read_unlock(&kvm->irq_srcu, idx);
}
- if (flags & POLLHUP) {
+ if (flags & EPOLLHUP) {
/* The eventfd is closing, detach from KVM */
unsigned long flags;
@@ -399,12 +399,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
*/
events = f.file->f_op->poll(f.file, &irqfd->pt);
- if (events & POLLIN)
+ if (events & EPOLLIN)
schedule_work(&irqfd->inject);
/*
* do not drop the file until the irqfd is fully initialized, otherwise
- * we might race against the POLLHUP
+ * we might race against the EPOLLHUP
*/
fdput(f);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 001085b611ad..4501e658e8d6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -151,17 +151,12 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
-int vcpu_load(struct kvm_vcpu *vcpu)
+void vcpu_load(struct kvm_vcpu *vcpu)
{
- int cpu;
-
- if (mutex_lock_killable(&vcpu->mutex))
- return -EINTR;
- cpu = get_cpu();
+ int cpu = get_cpu();
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
- return 0;
}
EXPORT_SYMBOL_GPL(vcpu_load);
@@ -171,7 +166,6 @@ void vcpu_put(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_put(vcpu);
preempt_notifier_unregister(&vcpu->preempt_notifier);
preempt_enable();
- mutex_unlock(&vcpu->mutex);
}
EXPORT_SYMBOL_GPL(vcpu_put);
@@ -1416,7 +1410,8 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
- bool write_fault, kvm_pfn_t *p_pfn)
+ bool write_fault, bool *writable,
+ kvm_pfn_t *p_pfn)
{
unsigned long pfn;
int r;
@@ -1442,6 +1437,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
}
+ if (writable)
+ *writable = true;
/*
* Get a reference here because callers of *hva_to_pfn* and
@@ -1507,7 +1504,7 @@ retry:
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
- r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+ r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
if (r == -EAGAIN)
goto retry;
if (r < 0)
@@ -2400,7 +2397,10 @@ static struct file_operations kvm_vcpu_fops = {
*/
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
- return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
+ char name[8 + 1 + ITOA_MAX_LEN + 1];
+
+ snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id);
+ return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
}
static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
@@ -2532,19 +2532,16 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
-#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
- * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
- * so vcpu_load() would break it.
+ * Some architectures have vcpu ioctls that are asynchronous to vcpu
+ * execution; mutex_lock() would break them.
*/
- if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
- return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
-#endif
-
-
- r = vcpu_load(vcpu);
- if (r)
+ r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg);
+ if (r != -ENOIOCTLCMD)
return r;
+
+ if (mutex_lock_killable(&vcpu->mutex))
+ return -EINTR;
switch (ioctl) {
case KVM_RUN: {
struct pid *oldpid;
@@ -2716,7 +2713,7 @@ out_free1:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
- vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
kfree(fpu);
kfree(kvm_sregs);
return r;
@@ -3168,21 +3165,18 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return PTR_ERR(kvm);
#ifdef CONFIG_KVM_MMIO
r = kvm_coalesced_mmio_init(kvm);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
+ if (r < 0)
+ goto put_kvm;
#endif
r = get_unused_fd_flags(O_CLOEXEC);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
+ if (r < 0)
+ goto put_kvm;
+
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (IS_ERR(file)) {
put_unused_fd(r);
- kvm_put_kvm(kvm);
- return PTR_ERR(file);
+ r = PTR_ERR(file);
+ goto put_kvm;
}
/*
@@ -3200,6 +3194,10 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
fd_install(r, file);
return r;
+
+put_kvm:
+ kvm_put_kvm(kvm);
+ return r;
}
static long kvm_dev_ioctl(struct file *filp,