diff options
223 files changed, 5109 insertions, 2577 deletions
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index cb8a3a00cc92..df904aec9904 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt @@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before entering atomic context, using: int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); This function will ensure that memory for the elements indexed in the range -defined by start and end has been allocated. Thereafter, a +defined by start and nr_elements has been allocated. Thereafter, a flex_array_put() call on an element in that range is guaranteed not to block. diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 9bef4e4cec50..f64c41f8ba61 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure. 4.13 KVM_GET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (out) Returns: 0 on success, -1 on error @@ -279,6 +279,8 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +/* ppc -- see arch/powerpc/include/asm/kvm.h */ + interrupt_bitmap is a bitmap of pending external interrupts. At most one bit may be set. This interrupt has been acknowledged by the APIC but not yet injected into the cpu core. @@ -286,7 +288,7 @@ but not yet injected into the cpu core. 4.14 KVM_SET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (in) Returns: 0 on success, -1 on error @@ -1263,6 +1265,29 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +4.54 KVM_SET_TSC_KHZ + +Capability: KVM_CAP_TSC_CONTROL +Architectures: x86 +Type: vcpu ioctl +Parameters: virtual tsc_khz +Returns: 0 on success, -1 on error + +Specifies the tsc frequency for the virtual machine. The unit of the +frequency is KHz. + +4.55 KVM_GET_TSC_KHZ + +Capability: KVM_CAP_GET_TSC_KHZ +Architectures: x86 +Type: vcpu ioctl +Parameters: none +Returns: virtual tsc-khz on success, negative value on error + +Returns the tsc frequency of the guest. The unit of the return value is +KHz. If the host has unstable tsc this ioctl returns -EIO instead as an +error. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 39 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* diff --git a/arch/arm/configs/at91x40_defconfig b/arch/arm/configs/at91x40_defconfig new file mode 100644 index 000000000000..c55e9212fcbb --- /dev/null +++ b/arch/arm/configs/at91x40_defconfig @@ -0,0 +1,48 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_EMBEDDED=y +# CONFIG_HOTPLUG is not set +# CONFIG_ELF_CORE is not set +# CONFIG_FUTEX is not set +# CONFIG_TIMERFD is not set +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set +CONFIG_ARCH_AT91=y +CONFIG_ARCH_AT91X40=y +CONFIG_MACH_AT91EB01=y +CONFIG_AT91_EARLY_USART0=y +CONFIG_CPU_ARM7TDMI=y +CONFIG_SET_MEM_PARAM=y +CONFIG_DRAM_BASE=0x01000000 +CONFIG_DRAM_SIZE=0x00400000 +CONFIG_FLASH_MEM_BASE=0x01400000 +CONFIG_PROCESSOR_ID=0x14000040 +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_BINFMT_FLAT=y +# CONFIG_SUSPEND is not set +# CONFIG_FW_LOADER is not set +CONFIG_MTD=y +CONFIG_MTD_PARTITIONS=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_RAM=y +CONFIG_MTD_ROM=y +CONFIG_BLK_DEV_RAM=y +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EXT2_FS=y +# CONFIG_DNOTIFY is not set +CONFIG_ROMFS_FS=y +# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2bf27f364d09..8182f45ca493 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -767,12 +767,20 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; #endif diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 19390231a0e9..2d299bf5d72f 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -83,6 +83,7 @@ config ARCH_AT91CAP9 select CPU_ARM926T select GENERIC_CLOCKEVENTS select HAVE_FB_ATMEL + select HAVE_NET_MACB config ARCH_AT572D940HF bool "AT572D940HF" diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c index 1f9d3cb64c50..d8df59a3426d 100644 --- a/arch/arm/mach-at91/board-eb01.c +++ b/arch/arm/mach-at91/board-eb01.c @@ -30,6 +30,11 @@ #include <mach/board.h> #include "generic.h" +static void __init at91eb01_init_irq(void) +{ + at91x40_init_interrupts(NULL); +} + static void __init at91eb01_map_io(void) { at91x40_initialize(40000000); @@ -38,7 +43,7 @@ static void __init at91eb01_map_io(void) MACHINE_START(AT91EB01, "Atmel AT91 EB01") /* Maintainer: Greg Ungerer <[email protected]> */ .timer = &at91x40_timer, - .init_irq = at91x40_init_interrupts, + .init_irq = at91eb01_init_irq, .map_io = at91eb01_map_io, MACHINE_END diff --git a/arch/arm/mach-at91/include/mach/cpu.h b/arch/arm/mach-at91/include/mach/cpu.h index 3bef931d0b1c..0700f2125305 100644 --- a/arch/arm/mach-at91/include/mach/cpu.h +++ b/arch/arm/mach-at91/include/mach/cpu.h @@ -27,6 +27,7 @@ #define ARCH_ID_AT91SAM9G45 0x819b05a0 #define ARCH_ID_AT91SAM9G45MRL 0x819b05a2 /* aka 9G45-ES2 & non ES lots */ #define ARCH_ID_AT91SAM9G45ES 0x819b05a1 /* 9G45-ES (Engineering Sample) */ +#define ARCH_ID_AT91SAM9X5 0x819a05a0 #define ARCH_ID_AT91CAP9 0x039A03A0 #define ARCH_ID_AT91SAM9XE128 0x329973a0 @@ -55,6 +56,12 @@ static inline unsigned long at91_cpu_fully_identify(void) #define ARCH_EXID_AT91SAM9G46 0x00000003 #define ARCH_EXID_AT91SAM9G45 0x00000004 +#define ARCH_EXID_AT91SAM9G15 0x00000000 +#define ARCH_EXID_AT91SAM9G35 0x00000001 +#define ARCH_EXID_AT91SAM9X35 0x00000002 +#define ARCH_EXID_AT91SAM9G25 0x00000003 +#define ARCH_EXID_AT91SAM9X25 0x00000004 + static inline unsigned long at91_exid_identify(void) { return at91_sys_read(AT91_DBGU_EXID); @@ -143,6 +150,27 @@ static inline unsigned long at91cap9_rev_identify(void) #define cpu_is_at91sam9m11() (0) #endif +#ifdef CONFIG_ARCH_AT91SAM9X5 +#define cpu_is_at91sam9x5() (at91_cpu_identify() == ARCH_ID_AT91SAM9X5) +#define cpu_is_at91sam9g15() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G15)) +#define cpu_is_at91sam9g35() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G35)) +#define cpu_is_at91sam9x35() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9X35)) +#define cpu_is_at91sam9g25() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G25)) +#define cpu_is_at91sam9x25() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9X25)) +#else +#define cpu_is_at91sam9x5() (0) +#define cpu_is_at91sam9g15() (0) +#define cpu_is_at91sam9g35() (0) +#define cpu_is_at91sam9x35() (0) +#define cpu_is_at91sam9g25() (0) +#define cpu_is_at91sam9x25() (0) +#endif + #ifdef CONFIG_ARCH_AT91CAP9 #define cpu_is_at91cap9() (at91_cpu_identify() == ARCH_ID_AT91CAP9) #define cpu_is_at91cap9_revB() (at91cap9_rev_identify() == ARCH_REVISION_CAP9_B) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h index f6c5617e16af..b214b5b0432d 100644 --- a/arch/ia64/kvm/vti.h +++ b/arch/ia64/kvm/vti.h @@ -83,13 +83,13 @@ union vac { unsigned long value; struct { - int a_int:1; - int a_from_int_cr:1; - int a_to_int_cr:1; - int a_from_psr:1; - int a_from_cpuid:1; - int a_cover:1; - int a_bsw:1; + unsigned int a_int:1; + unsigned int a_from_int_cr:1; + unsigned int a_to_int_cr:1; + unsigned int a_from_psr:1; + unsigned int a_from_cpuid:1; + unsigned int a_cover:1; + unsigned int a_bsw:1; long reserved:57; }; }; @@ -97,12 +97,12 @@ union vac { union vdc { unsigned long value; struct { - int d_vmsw:1; - int d_extint:1; - int d_ibr_dbr:1; - int d_pmc:1; - int d_to_pmd:1; - int d_itm:1; + unsigned int d_vmsw:1; + unsigned int d_extint:1; + unsigned int d_ibr_dbr:1; + unsigned int d_pmc:1; + unsigned int d_to_pmd:1; + unsigned int d_itm:1; long reserved:58; }; }; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8e256cc5dcd9..351c80fbba7e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -997,9 +997,6 @@ config IRQ_GT641XX config IRQ_GIC bool -config IRQ_CPU_OCTEON - bool - config MIPS_BOARDS_GEN bool @@ -1359,8 +1356,6 @@ config CPU_SB1 config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON - select IRQ_CPU - select IRQ_CPU_OCTEON select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_SMP diff --git a/arch/mips/alchemy/devboards/db1x00/board_setup.c b/arch/mips/alchemy/devboards/db1x00/board_setup.c index 05f120ff90f9..5c956fe8760f 100644 --- a/arch/mips/alchemy/devboards/db1x00/board_setup.c +++ b/arch/mips/alchemy/devboards/db1x00/board_setup.c @@ -127,13 +127,10 @@ const char *get_system_type(void) void __init board_setup(void) { unsigned long bcsr1, bcsr2; - u32 pin_func; bcsr1 = DB1000_BCSR_PHYS_ADDR; bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS; - pin_func = 0; - #ifdef CONFIG_MIPS_DB1000 printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n"); #endif @@ -164,12 +161,16 @@ void __init board_setup(void) /* Not valid for Au1550 */ #if defined(CONFIG_IRDA) && \ (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100)) - /* Set IRFIRSEL instead of GPIO15 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; - au_writel(pin_func, SYS_PINFUNC); - /* Power off until the driver is in use */ - bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, - BCSR_RESETS_IRDA_MODE_OFF); + { + u32 pin_func; + + /* Set IRFIRSEL instead of GPIO15 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; + au_writel(pin_func, SYS_PINFUNC); + /* Power off until the driver is in use */ + bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, + BCSR_RESETS_IRDA_MODE_OFF); + } #endif bcsr_write(BCSR_PCMCIA, 0); /* turn off PCMCIA power */ @@ -177,31 +178,35 @@ void __init board_setup(void) alchemy_gpio1_input_enable(); #ifdef CONFIG_MIPS_MIRAGE - /* GPIO[20] is output */ - alchemy_gpio_direction_output(20, 0); + { + u32 pin_func; - /* Set GPIO[210:208] instead of SSI_0 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; + /* GPIO[20] is output */ + alchemy_gpio_direction_output(20, 0); - /* Set GPIO[215:211] for LEDs */ - pin_func |= 5 << 2; + /* Set GPIO[210:208] instead of SSI_0 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; - /* Set GPIO[214:213] for more LEDs */ - pin_func |= 5 << 12; + /* Set GPIO[215:211] for LEDs */ + pin_func |= 5 << 2; - /* Set GPIO[207:200] instead of PCMCIA/LCD */ - pin_func |= SYS_PF_LCD | SYS_PF_PC; - au_writel(pin_func, SYS_PINFUNC); + /* Set GPIO[214:213] for more LEDs */ + pin_func |= 5 << 12; - /* - * Enable speaker amplifier. This should - * be part of the audio driver. - */ - alchemy_gpio_direction_output(209, 1); + /* Set GPIO[207:200] instead of PCMCIA/LCD */ + pin_func |= SYS_PF_LCD | SYS_PF_PC; + au_writel(pin_func, SYS_PINFUNC); - pm_power_off = mirage_power_off; - _machine_halt = mirage_power_off; - _machine_restart = (void(*)(char *))mips_softreset; + /* + * Enable speaker amplifier. This should + * be part of the audio driver. + */ + alchemy_gpio_direction_output(209, 1); + + pm_power_off = mirage_power_off; + _machine_halt = mirage_power_off; + _machine_restart = (void(*)(char *))mips_softreset; + } #endif #ifdef CONFIG_MIPS_BOSPORUS diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c index 15125c2fda7d..34a90a4bb6f4 100644 --- a/arch/mips/alchemy/xxs1500/init.c +++ b/arch/mips/alchemy/xxs1500/init.c @@ -51,10 +51,9 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize)) memsize = 0x04000000; - else - strict_strtoul(memsize_str, 0, &memsize); + add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 88c9d963be88..9a6243676e22 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -16,8 +16,8 @@ int main(int argc, char *argv[]) { + unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; struct stat sb; - uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; if (argc != 3) { fprintf(stderr, "Usage: %s <pathname> <vmlinux_load_addr>\n", diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index caae22858163..cad555ebeca3 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -1,11 +1,7 @@ -config CAVIUM_OCTEON_SPECIFIC_OPTIONS - bool "Enable Octeon specific options" - depends on CPU_CAVIUM_OCTEON - default "y" +if CPU_CAVIUM_OCTEON config CAVIUM_CN63XXP1 bool "Enable CN63XXP1 errata worarounds" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help The CN63XXP1 chip requires build time workarounds to @@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1 config CAVIUM_OCTEON_2ND_KERNEL bool "Build the kernel to be used as a 2nd kernel on the same chip" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help This option configures this kernel to be linked at a different @@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL config CAVIUM_OCTEON_HW_FIX_UNALIGNED bool "Enable hardware fixups of unaligned loads and stores" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Configure the Octeon hardware to automatically fix unaligned loads @@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED config CAVIUM_OCTEON_CVMSEG_SIZE int "Number of L1 cache lines reserved for CVMSEG memory" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS range 0 54 default 1 help @@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE config CAVIUM_OCTEON_LOCK_L2 bool "Lock often used kernel code in the L2" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Enable locking parts of the kernel into the L2 cache. @@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_STATIC - depends on CPU_CAVIUM_OCTEON config CAVIUM_OCTEON_HELPER def_bool y @@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH config SWIOTLB def_bool y - depends on CPU_CAVIUM_OCTEON select IOMMU_HELPER select NEED_SG_DMA_LENGTH + + +endif # CPU_CAVIUM_OCTEON diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index 650ac9ba734c..b4db69fbc40c 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -17,6 +17,6 @@ #define SMP_CACHE_SHIFT L1_CACHE_SHIFT #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) #endif /* _ASM_CACHE_H */ diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h index fa4328f9124f..65f9bdd02f1f 100644 --- a/arch/mips/include/asm/cevt-r4k.h +++ b/arch/mips/include/asm/cevt-r4k.h @@ -14,6 +14,9 @@ #ifndef __ASM_CEVT_R4K_H #define __ASM_CEVT_R4K_H +#include <linux/clockchips.h> +#include <asm/time.h> + DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); void mips_event_handler(struct clock_event_device *dev); diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index f5e856015329..c565b7c3f0b5 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + flush_tlb_mm(vma->vm_mm); } static inline int huge_pte_none(pte_t pte) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h index 32978d32561a..ed72e6a26b73 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h @@ -88,7 +88,7 @@ struct bcm_tag { char kernel_crc[CRC_LEN]; /* 228-235: Unused at present */ char reserved1[8]; - /* 236-239: CRC32 of header excluding tagVersion */ + /* 236-239: CRC32 of header excluding last 20 bytes */ char header_crc[CRC_LEN]; /* 240-255: Unused at present */ char reserved2[16]; diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 9ce9f64cb76f..2d8e447cb828 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free); */ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) { - int first, pages, npages; + int first, pages; if (laddr > 0xffffff) { if (vdma_debug) @@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) return -EINVAL; /* invalid physical address */ } - npages = pages = - (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; + pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; first = laddr >> 12; if (vdma_debug) printk("vdma_remap: first=%x, pages=%x\n", first, pages); diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c index 5ebe75a68350..d7feb898692c 100644 --- a/arch/mips/jz4740/dma.c +++ b/arch/mips/jz4740/dma.c @@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue); static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma) { - uint32_t status; - - status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); + (void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE); diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c index fe01678d94fd..eaa853a54af6 100644 --- a/arch/mips/jz4740/time.c +++ b/arch/mips/jz4740/time.c @@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt, static struct clock_event_device jz4740_clockevent = { .name = "jz4740-timer", - .features = CLOCK_EVT_FEAT_PERIODIC, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_next_event = jz4740_clockevent_set_next, .set_mode = jz4740_clockevent_set_mode, .rating = 200, diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c index b2c015129055..654d5c3900b6 100644 --- a/arch/mips/jz4740/timer.c +++ b/arch/mips/jz4740/timer.c @@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); } +EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog); void jz4740_timer_disable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); } +EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog); void __init jz4740_timer_init(void) { diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 94ca2b018af7..feb8021a305f 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -23,6 +23,7 @@ #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ #define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ +#define JUMP_RANGE_MASK ((1UL << 28) - 1) #define INSN_NOP 0x00000000 /* nop */ #define INSN_JAL(addr) \ @@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void) /* jal (ftrace_caller + 8), jump over the first two instruction */ buf = (u32 *)&insn_jal_ftrace_caller; - uasm_i_jal(&buf, (FTRACE_ADDR + 8)); + uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK); #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* j ftrace_graph_caller */ buf = (u32 *)&insn_j_ftrace_graph_caller; - uasm_i_j(&buf, (unsigned long)ftrace_graph_caller); + uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK); #endif } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d21c388c0116..584e6b55c865 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -540,8 +540,8 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) secure_computing(regs->regs[2]); if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), - regs->regs[2]); + audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), + -regs->regs[2]); if (!(current->ptrace & PT_PTRACED)) goto out; diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7f5468b38d4c..7f1377eb22d3 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -565,7 +565,7 @@ einval: li v0, -ENOSYS sys sys_ioprio_get 2 /* 4315 */ sys sys_utimensat 4 sys sys_signalfd 3 - sys sys_ni_syscall 0 + sys sys_ni_syscall 0 /* was timerfd */ sys sys_eventfd 1 sys sys_fallocate 6 /* 4320 */ sys sys_timerfd_create 2 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a2e1fcbc41dc..7c0ef7f128bf 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -404,7 +404,7 @@ sys_call_table: PTR sys_ioprio_get PTR sys_utimensat /* 5275 */ PTR sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create /* 5280 */ diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index b2c7624995b8..de6c5563beab 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -403,7 +403,7 @@ EXPORT(sysn32_call_table) PTR sys_ioprio_get PTR compat_sys_utimensat PTR compat_sys_signalfd /* 6280 */ - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 049a9c8c49a0..b0541dda8830 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -522,7 +522,7 @@ sys_call_table: PTR sys_ioprio_get /* 4315 */ PTR compat_sys_utimensat PTR compat_sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys32_fallocate /* 4320 */ PTR sys_timerfd_create diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 832afbb87588..e4b0b0bec039 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -74,6 +74,7 @@ SECTIONS INIT_TASK_DATA(PAGE_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) + READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA CONSTRUCTORS } diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index 11b193f848f8..d93830ad6113 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -29,9 +29,10 @@ unsigned long memsize, highmemsize; #define parse_even_earlier(res, option, p) \ do { \ - int ret; \ + unsigned int tmp __maybe_unused; \ + \ if (strncmp(option, (char *)p, strlen(option)) == 0) \ - ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \ + tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \ } while (0) void __init prom_init_env(void) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index b4923a75cb4b..71bddf8f7d25 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1075,7 +1075,6 @@ static int __cpuinit probe_scache(void) unsigned long flags, addr, begin, end, pow2; unsigned int config = read_c0_config(); struct cpuinfo_mips *c = ¤t_cpu_data; - int tmp; if (config & CONF_SC) return 0; @@ -1108,7 +1107,6 @@ static int __cpuinit probe_scache(void) /* Now search for the wrap around point. */ pow2 = (128 * 1024); - tmp = 0; for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { cache_op(Index_Load_Tag_SD, addr); __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 5ef294fbb6e7..f5734c2c8097 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1151,8 +1151,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) struct uasm_reloc *r = relocs; u32 *f; unsigned int final_len; - struct mips_huge_tlb_info htlb_info; - enum vmalloc64_mode vmalloc_mode; + struct mips_huge_tlb_info htlb_info __maybe_unused; + enum vmalloc64_mode vmalloc_mode __maybe_unused; memset(tlb_handler, 0, sizeof(tlb_handler)); memset(labels, 0, sizeof(labels)); diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index 414f0c99b196..31180c321a1a 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops; void __init prom_init(void) { - int result; - prom_argc = fw_arg0; _prom_argv = (int *) fw_arg1; _prom_envp = (int *) fw_arg2; @@ -360,20 +358,14 @@ void __init prom_init(void) #ifdef CONFIG_SERIAL_8250_CONSOLE console_config(); #endif - /* Early detection of CMP support */ - result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ); - #ifdef CONFIG_MIPS_CMP - if (result) + /* Early detection of CMP support */ + if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ)) register_smp_ops(&cmp_smp_ops); + else #endif #ifdef CONFIG_MIPS_MT_SMP -#ifdef CONFIG_MIPS_CMP - if (!result) register_smp_ops(&vsmp_smp_ops); -#else - register_smp_ops(&vsmp_smp_ops); -#endif #endif #ifdef CONFIG_MIPS_MT_SMTC register_smp_ops(&msmtc_smp_ops); diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 9027061f0ead..e85c977328da 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock); static inline int mips_pcibios_iack(void) { int irq; - u32 dummy; /* * Determine highest priority pending interrupt by performing @@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void) BONITO_PCIMAP_CFG = 0x20000; /* Flush Bonito register block */ - dummy = BONITO_PCIMAP_CFG; + (void) BONITO_PCIMAP_CFG; iob(); /* sync */ irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg); diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c index f9b9dcdfa9dd..98fd0099d964 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c @@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d, static struct irq_chip msp_per_irq_controller = { .name = "MSP_PER", - .irq_enable = unmask_per_irq. + .irq_enable = unmask_per_irq, .irq_disable = mask_per_irq, .irq_ack = msp_per_irq_ack, #ifdef CONFIG_SMP diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S index dbb5c7b4b70f..f8a751c03282 100644 --- a/arch/mips/power/hibernate.S +++ b/arch/mips/power/hibernate.S @@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume) 0: PTR_L t1, PBE_ADDRESS(t0) /* source */ PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */ - PTR_ADDIU t3, t1, PAGE_SIZE + PTR_ADDU t3, t1, PAGE_SIZE 1: REG_L t8, (t1) REG_S t8, (t2) diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c index deddbf0ebe5c..698904daf901 100644 --- a/arch/mips/sgi-ip22/ip22-platform.c +++ b/arch/mips/sgi-ip22/ip22-platform.c @@ -132,7 +132,7 @@ static struct platform_device eth1_device = { */ static int __init sgiseeq_devinit(void) { - unsigned int tmp; + unsigned int pbdma __maybe_unused; int res, i; eth0_pd.hpc = hpc3c0; @@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void) /* Second HPC is missing? */ if (ip22_is_fullhouse() || - get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1])) + get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1])) return 0; sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 | diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 603fc91c1030..1a94c9894188 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -32,7 +32,7 @@ static unsigned long dosample(void) { u32 ct0, ct1; - u8 msb, lsb; + u8 msb; /* Start the counter. */ sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL | @@ -46,7 +46,7 @@ static unsigned long dosample(void) /* Latch and spin until top byte of counter2 is zero */ do { writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword); - lsb = readb(&sgint->tcnt2); + (void) readb(&sgint->tcnt2); msb = readb(&sgint->tcnt2); ct1 = read_c0_count(); } while (msb); diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c index a1fa4abb3f6a..cd0d5b06cd83 100644 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ b/arch/mips/sgi-ip27/ip27-hubio.c @@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, unsigned long xtalk_addr, size_t size) { nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); - volatile hubreg_t junk; unsigned i; /* use small-window mapping if possible */ @@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, * after we write it. */ IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr); - junk = HUB_L(IIO_ITTE_GET(nasid, i)); + (void) HUB_L(IIO_ITTE_GET(nasid, i)); return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE); } diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index c3d30a88daf3..1d1919a44e88 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -54,11 +54,8 @@ void __init setup_replication_mask(void) static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid) { - cnodeid_t client_cnode; kern_vars_t *kvp; - client_cnode = NASID_TO_COMPACT_NODEID(client_nasid); - kvp = &hub_data(client_nasid)->kern_vars; KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp; diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index c76151b56568..0904d4d30cb3 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void) static __init unsigned long dosample(void) { u32 ct0, ct1; - volatile u8 msb, lsb; + volatile u8 msb; /* Start the counter. */ outb_p(0x34, 0x43); @@ -108,7 +108,7 @@ static __init unsigned long dosample(void) /* Latch and spin until top byte of counter0 is zero */ do { outb(0x00, 0x43); - lsb = inb(0x40); + (void) inb(0x40); msb = inb(0x40); ct1 = read_c0_count(); } while (msb); diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad77..d2ca5ed3877b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 << 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 << 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 << 2) + +#define KVM_SREGS_E_SPRG8 (1 << 3) +#define KVM_SREGS_E_MCIVPR (1 << 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 << 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 << 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 << 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 << 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 << 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 << 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 << 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 << 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 << 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 << 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0xffffffff to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR (1 << 0) +#define KVM_SREGS_E_UPDATE_TSR (1 << 1) +#define KVM_SREGS_E_UPDATE_DEC (1 << 2) +#define KVM_SREGS_E_UPDATE_DBSR (1 << 3) + +/* + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a + * previous KVM_GET_REGS. + * + * Unless otherwise indicated, setting any register with KVM_SET_SREGS + * directly sets its value. It does not trigger any special semantics such + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct + * just received from KVM_GET_SREGS is always a no-op. + */ struct kvm_sregs { __u32 pvr; union { @@ -62,6 +170,82 @@ struct kvm_sregs { __u64 dbat[8]; } ppc32; } s; + struct { + union { + struct { /* KVM_SREGS_E_IMPL_FSL */ + __u32 features; /* KVM_SREGS_E_FSL_ */ + __u32 svr; + __u64 mcar; + __u32 hid0; + + /* KVM_SREGS_E_FSL_PIDn */ + __u32 pid1, pid2; + } fsl; + __u8 pad[256]; + } impl; + + __u32 features; /* KVM_SREGS_E_ */ + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ + __u32 pir; /* read-only */ + __u64 sprg8; + __u64 sprg9; /* E.ED */ + __u64 csrr0; + __u64 dsrr0; /* E.ED */ + __u64 mcsrr0; + __u32 csrr1; + __u32 dsrr1; /* E.ED */ + __u32 mcsrr1; + __u32 esr; + __u64 dear; + __u64 ivpr; + __u64 mcivpr; + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ + + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ + __u32 tcr; + __u32 decar; + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ + + /* + * Userspace can read TB directly, but the + * value reported here is consistent with "dec". + * + * Read-only. + */ + __u64 tb; + + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ + __u32 dbcr[3]; + __u32 iac[4]; + __u32 dac[2]; + __u32 dvc[2]; + __u8 num_iac; /* read-only */ + __u8 num_dac; /* read-only */ + __u8 num_dvc; /* read-only */ + __u8 pad; + + __u32 epr; /* EXP */ + __u32 vrsave; /* a.k.a. USPRG0 */ + __u32 epcr; /* KVM_SREGS_E_64 */ + + __u32 mas0; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; + __u32 mas4; + __u32 mas6; + + __u32 ivor_low[16]; /* IVOR0-15 */ + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ + + __u32 mmucfg; /* read-only */ + __u32 eptcfg; /* E.PT, read-only */ + __u32 tlbcfg[4];/* read-only */ + __u32 tlbps[4]; /* read-only */ + + __u32 eplc, epsc; /* E.PD */ + } e; __u8 pad[1020]; } u; }; diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a92..a0e57618ff33 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); } -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb25..7a2a565f88c4 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; + u32 svr; u32 mas0; u32 mas1; @@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 { u32 hid1; u32 tlb0cfg; u32 tlb1cfg; + u64 mcar; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a39..186f150b9b89 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -223,6 +223,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong sprg4; ulong sprg5; @@ -232,6 +233,9 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; + ulong mcsrr0; + ulong mcsrr1; + ulong mcsr; ulong esr; u32 dec; u32 decar; @@ -255,6 +259,7 @@ struct kvm_vcpu_arch { u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING + struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c344..9345238edecf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); /* Core-specific hooks */ @@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145ab..cf0d82278951 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -395,6 +395,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 55613e33e263..a6ae1cfad86c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (data && !(data & DABR_TRANSLATION)) return -EIO; #ifdef CONFIG_HAVE_HW_BREAKPOINT + if (ptrace_get_breakpoints(task) < 0) + return -ESRCH; + bp = thread->ptrace_bps[0]; if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } + ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, (DABR_DATA_WRITE | DABR_DATA_READ), &attr.bp_type); ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) + if (ret) { + ptrace_put_breakpoints(task); return ret; + } thread->ptrace_bps[0] = bp; + ptrace_put_breakpoints(task); thread->dabr = data; return 0; } @@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; + ptrace_put_breakpoints(task); return PTR_ERR(bp); } + ptrace_put_breakpoints(task); + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ /* Move contents to the DABR register */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e7421143..da3a1225c0ac 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b27..549bb2c9a47a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); return emulated; } @@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); return emulated; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c3..8462b3a1c1c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.shared->srr0 = regs->srr0; vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; @@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + vcpu->arch.tcr = sregs->u.e.tcr; + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + vcpu->arch.dec = sregs->u.e.dec; + + kvmppc_emulate_dec(vcpu); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + /* + * FIXME: existing KVM timer handling is incomplete. + * TSR cannot be read by the guest, and its value in + * vcpu->arch is always zero. For now, just handle + * the case where the caller is trying to inject a + * decrementer interrupt. + */ + + if ((sregs->u.e.tsr & TSR_DIS) && + (vcpu->arch.tcr & TCR_DIE)) + kvmppc_core_queue_dec(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = 0; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != 0) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2d..b58ccae95904 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -380,7 +380,6 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_SPRG4(r4) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b595..318dbc61ba44 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Registers init */ vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ vcpu->vcpu_id = 0; @@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu_e500->mas0; + sregs->u.e.mas1 = vcpu_e500->mas1; + sregs->u.e.mas2 = vcpu_e500->mas2; + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas4 = vcpu_e500->mas4; + sregs->u.e.mas6 = vcpu_e500->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu_e500->mas0 = sregs->u.e.mas0; + vcpu_e500->mas1 = sregs->u.e.mas1; + vcpu_e500->mas2 = sregs->u.e.mas2; + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas4 = sregs->u.e.mas4; + vcpu_e500->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc9634..69cd665a0caf 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, <[email protected]> * @@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_PID: - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = spr_val; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: vcpu_e500->pid[1] = spr_val; break; @@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; case SPRN_HID1: kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; case SPRN_MMUCSR0: kvmppc_set_gpr(vcpu, rt, 0); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a9..b18fe353397d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, [email protected] * @@ -24,6 +24,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" #include "trace.h" +#include "timing.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) vcpu_e500->mas7 = 0; } + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); return EMULATE_DONE; } @@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) write_host_tlbe(vcpu_e500, stlbsel, sesel); } + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } @@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, return -1; } +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = pid; +} + void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { struct tlbe *tlbe; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb2..141dce3c6810 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + return vcpu->arch.dec - jd; +} + /* XXX to do: * lhax * lhaux @@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_DEC: { - u64 jd = get_tb() - vcpu->arch.dec_jiffies; - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug("mfDEC: %x - %llx = %lx\n", - vcpu->arch.dec, jd, - kvmppc_get_gpr(vcpu, rt)); + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); break; } default: @@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: @@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) printk("mtspr: unknown spr %x\n", sprn); break; } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); break; case OP_31_XOP_DCBI: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efde..616dd516ca1f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else case KVM_CAP_PPC_SEGSTATE: +#endif case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: @@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + return 0; } @@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a33..319177df9587 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) { int i; - /* pause guest execution to avoid concurrent updates */ - mutex_lock(&vcpu->mutex); + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.last_exit_type = 0xDEAD; for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { @@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_exit.tv64 = 0; vcpu->arch.timing_last_enter.tv64 = 0; - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->arch.exit_timing_lock); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) @@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) return; } + mutex_lock(&vcpu->arch.exit_timing_lock); + vcpu->arch.timing_count_type[type]++; /* sum */ @@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) vcpu->arch.timing_min_duration[type] = duration; if (unlikely(duration > vcpu->arch.timing_max_duration[type])) vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); } void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) @@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) { struct kvm_vcpu *vcpu = m->private; int i; + u64 min, max, sum, sum_quad; seq_printf(m, "%s", "type count min max sum sum_squared\n"); + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", kvm_exit_names[i], vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); + min, + max, + sum, + sum_quad); + } return 0; } diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 2130ca674e9b..3d7b209b2178 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -117,7 +117,11 @@ void user_enable_single_step(struct task_struct *child) set_tsk_thread_flag(child, TIF_SINGLESTEP); + if (ptrace_get_breakpoints(child) < 0) + return; + set_single_step(child, pc); + ptrace_put_breakpoints(child); } void user_disable_single_step(struct task_struct *child) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f5213564326..0049211959c0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -14,6 +14,8 @@ #include <asm/desc_defs.h> struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; struct x86_exception { u8 vector; @@ -24,6 +26,24 @@ struct x86_exception { }; /* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + +/* * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. @@ -62,6 +82,7 @@ struct x86_exception { #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { /* @@ -71,8 +92,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, struct x86_exception *fault); /* @@ -82,8 +104,8 @@ struct x86_emulate_ops { * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. @@ -92,8 +114,8 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* @@ -102,11 +124,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_emulated)(unsigned long addr, - void *val, - unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); /* * write_emulated: Write bytes to emulated/special memory area. @@ -115,11 +135,10 @@ struct x86_emulate_ops { * required). * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_emulated)(unsigned long addr, - const void *val, + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -129,40 +148,54 @@ struct x86_emulate_ops { * @new: [IN ] Value to write to @addr. * @bytes: [IN ] Number of bytes to access using CMPXCHG. */ - int (*cmpxchg_emulated)(unsigned long addr, + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); - - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); + + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); + + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); + + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); }; +typedef u32 __attribute__((vector_size(16))) sse128_t; + /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -174,11 +207,13 @@ struct operand { ulong ea; unsigned seg; } mem; + unsigned xmm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; }; }; @@ -197,6 +232,7 @@ struct read_cache { struct decode_cache { u8 twobyte; u8 b; + u8 intercept; u8 lock_prefix; u8 rep_prefix; u8 op_bytes; @@ -209,6 +245,7 @@ struct decode_cache { u8 seg_override; unsigned int d; int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); unsigned long regs[NR_VCPU_REGS]; unsigned long eip; /* modrm */ @@ -227,17 +264,15 @@ struct x86_emulate_ctxt { struct x86_emulate_ops *ops; /* Register state before/after emulation. */ - struct kvm_vcpu *vcpu; - unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ bool only_vendor_specific_insn; @@ -249,8 +284,8 @@ struct x86_emulate_ctxt { }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -259,6 +294,69 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + +enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, + + nr_x86_intercepts +}; + /* Host execution mode. */ #if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 @@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf0..d2ac8e2ee897 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -30,14 +30,30 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MMIO_SIZE 16 #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) @@ -118,6 +134,9 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, }; enum { @@ -256,7 +275,7 @@ struct kvm_mmu { struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq); + u64 *spte, const void *pte); hpa_t root_hpa; int root_level; int shadow_root_level; @@ -340,7 +359,6 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; - gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; @@ -367,18 +385,22 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u32 virtual_tsc_khz; bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; bool nmi_pending; bool nmi_injected; @@ -448,9 +470,6 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; - u32 virtual_tsc_khz; - u32 virtual_tsc_mult; - s8 virtual_tsc_shift; struct kvm_xen_hvm_config xen_hvm_config; @@ -502,6 +521,8 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct x86_instruction_info; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -586,9 +607,17 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + const struct trace_print_flags *exit_reasons_str; }; @@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ @@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); - void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -657,8 +690,6 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); -int emulate_clts(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce71413d0b..485b4f1f079b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -118,6 +118,7 @@ complete list. */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e61539b07d2c..447a28de6f09 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids }, }, [ C(LL ) ] = { - /* - * TBD: Need Off-core Response Performance Monitoring support - */ [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, /* * Use RFO, not WRITEBACK, because a write miss would typically occur * on RFO. */ [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01bb, - /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids }; /* - * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; + * See IA32 SDM Vol 3B 30.6.1.3 */ -#define DMND_DATA_RD (1 << 0) -#define DMND_RFO (1 << 1) -#define DMND_WB (1 << 3) -#define PF_DATA_RD (1 << 4) -#define PF_DATA_RFO (1 << 5) -#define RESP_UNCORE_HIT (1 << 8) -#define RESP_MISS (0xf600) /* non uncore hit */ +#define NHM_DMND_DATA_RD (1 << 0) +#define NHM_DMND_RFO (1 << 1) +#define NHM_DMND_IFETCH (1 << 2) +#define NHM_DMND_WB (1 << 3) +#define NHM_PF_DATA_RD (1 << 4) +#define NHM_PF_DATA_RFO (1 << 5) +#define NHM_PF_IFETCH (1 << 6) +#define NHM_OFFCORE_OTHER (1 << 7) +#define NHM_UNCORE_HIT (1 << 8) +#define NHM_OTHER_CORE_HIT_SNP (1 << 9) +#define NHM_OTHER_CORE_HITM (1 << 10) + /* reserved */ +#define NHM_REMOTE_CACHE_FWD (1 << 12) +#define NHM_REMOTE_DRAM (1 << 13) +#define NHM_LOCAL_DRAM (1 << 14) +#define NHM_NON_DRAM (1 << 15) + +#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) + +#define NHM_DMND_READ (NHM_DMND_DATA_RD) +#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) +#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) + +#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) static __initconst const u64 nehalem_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] @@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs { [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, }, } }; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72a..f65e5b521dbd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) unsigned len, type; struct perf_event *bp; + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; + data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: @@ -655,6 +658,9 @@ restore: } goto restore; } + + ptrace_put_breakpoints(tsk); + return ((orig_ret < 0) ? orig_ret : rc); } @@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) if (n < HBP_NUM) { struct perf_event *bp; + + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; + bp = thread->ptrace_bps[n]; if (!bp) - return 0; - val = bp->hw.info.address; + val = 0; + else + val = bp->hw.info.address; + + ptrace_put_breakpoints(tsk); } else if (n == 6) { val = thread->debugreg6; } else if (n == 7) { @@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, struct perf_event *bp; struct thread_struct *t = &tsk->thread; struct perf_event_attr attr; + int err = 0; + + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; if (!t->ptrace_bps[nr]) { ptrace_breakpoint_init(&attr); @@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, * writing for the user. And anyway this is the previous * behaviour. */ - if (IS_ERR(bp)) - return PTR_ERR(bp); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + goto put; + } t->ptrace_bps[nr] = bp; } else { - int err; - bp = t->ptrace_bps[nr]; attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); - if (err) - return err; } - - return 0; +put: + ptrace_put_breakpoints(tsk); + return err; } /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8b..d6e2477feb18 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -73,9 +73,14 @@ #define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ +#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ +#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ @@ -102,11 +107,14 @@ struct opcode { u32 flags; + u8 intercept; union { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; struct group_dual *gdual; + struct gprefix *gprefix; } u; + int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; struct group_dual { @@ -114,6 +122,13 @@ struct group_dual { struct opcode mod3[8]; }; +struct gprefix { + struct opcode pfx_no; + struct opcode pfx_66; + struct opcode pfx_f2; + struct opcode pfx_f3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -248,42 +263,42 @@ struct group_dual { "w", "r", _LO32, "r", "", "r") /* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) #define __emulate_1op(_op, _dst, _eflags, _suffix) \ @@ -346,13 +361,25 @@ struct group_dual { } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ - do { \ - switch((_src).bytes) { \ - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ +#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ + do { \ + switch((_src).bytes) { \ + case 1: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "b"); \ + break; \ + case 2: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "w"); \ + break; \ + case 4: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "l"); \ + break; \ + case 8: \ + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "q")); \ + break; \ } \ } while (0) @@ -388,13 +415,33 @@ struct group_dual { (_type)_x; \ }) -#define insn_fetch_arr(_arr, _size, _eip) \ +#define insn_fetch_arr(_arr, _size, _eip) \ ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ }) +static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + struct x86_instruction_info info = { + .intercept = intercept, + .rep_prefix = ctxt->decode.rep_prefix, + .modrm_mod = ctxt->decode.modrm_mod, + .modrm_reg = ctxt->decode.modrm_reg, + .modrm_rm = ctxt->decode.modrm_rm, + .src_val = ctxt->decode.src.val64, + .src_bytes = ctxt->decode.src.bytes, + .dst_bytes = ctxt->decode.dst.bytes, + .ad_bytes = ctxt->decode.ad_bytes, + .next_rip = ctxt->eip, + }; + + return ctxt->ops->intercept(ctxt, &info, stage); +} + static inline unsigned long ad_mask(struct decode_cache *c) { return (1UL << (c->ad_bytes << 3)) - 1; @@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + static void set_seg_override(struct decode_cache *c, int seg) { c->has_seg_override = true; @@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(seg, ctxt->vcpu); + return ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct decode_cache *c) { if (!c->has_seg_override) @@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static ulong linear(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr) -{ - struct decode_cache *c = &ctxt->decode; - ulong la; - - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; - if (c->ad_bytes != 8) - la &= (u32)-1; - return la; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } +static int emulate_db(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, DB_VECTOR, 0, false); +} + static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) { return emulate_exception(ctxt, GP_VECTOR, err, true); } +static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) +{ + return emulate_exception(ctxt, SS_VECTOR, err, true); +} + static int emulate_ud(struct x86_emulate_ctxt *ctxt) { return emulate_exception(ctxt, UD_VECTOR, 0, false); @@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, DE_VECTOR, 0, false); } +static int emulate_nm(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, NM_VECTOR, 0, false); +} + +static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +{ + u16 selector; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); + return selector; +} + +static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, + unsigned seg) +{ + u16 dummy; + u32 base3; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); +} + +static int __linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, bool fetch, + ulong *linear) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_struct desc; + bool usable; + ulong la; + u32 lim; + u16 sel; + unsigned cpl, rpl; + + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + switch (ctxt->mode) { + case X86EMUL_MODE_REAL: + break; + case X86EMUL_MODE_PROT64: + if (((signed long)la << 16) >> 16 != la) + return emulate_gp(ctxt, 0); + break; + default: + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, + addr.seg); + if (!usable) + goto bad; + /* code segment or read-only data segment */ + if (((desc.type & 8) || !(desc.type & 2)) && write) + goto bad; + /* unreadable code segment */ + if (!fetch && (desc.type & 8) && !(desc.type & 2)) + goto bad; + lim = desc_limit_scaled(&desc); + if ((desc.type & 8) || !(desc.type & 4)) { + /* expand-up segment */ + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } else { + /* exapand-down segment */ + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) + goto bad; + lim = desc.d ? 0xffffffff : 0xffff; + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } + cpl = ctxt->ops->cpl(ctxt); + rpl = sel & 3; + cpl = max(cpl, rpl); + if (!(desc.type & 8)) { + /* data segment */ + if (cpl > desc.dpl) + goto bad; + } else if ((desc.type & 8) && !(desc.type & 4)) { + /* nonconforming code segment */ + if (cpl != desc.dpl) + goto bad; + } else if ((desc.type & 8) && (desc.type & 4)) { + /* conforming code segment */ + if (cpl < desc.dpl) + goto bad; + } + break; + } + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) + la &= (u32)-1; + *linear = la; + return X86EMUL_CONTINUE; +bad: + if (addr.seg == VCPU_SREG_SS) + return emulate_ss(ctxt, addr.seg); + else + return emulate_gp(ctxt, addr.seg); +} + +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + +static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { + unsigned long linear; + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, - size, ctxt->vcpu, &ctxt->exception); + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, } static int read_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { @@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, size, 2); if (rc != X86EMUL_CONTINUE) return rc; addr.ea += 2; - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, address, op_bytes); return rc; } @@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op) } } -static void decode_register_operand(struct operand *op, +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, struct decode_cache *c, int inhibit_bytereg) { @@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op, if (!(c->d & ModRM)) reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); + + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = reg; + read_sse_reg(ctxt, &op->vec_val, reg); + return; + } + op->type = OP_REG; if ((c->d & ByteOp) && !inhibit_bytereg) { op->addr.reg = decode_register(reg, c->regs, highbyte_regs); @@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; op->addr.reg = decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = c->modrm_rm; + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); + return rc; + } fetch_register_operand(op); return rc; } @@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(addr, mc->data + mc->end, n, - &ctxt->exception, ctxt->vcpu); + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +static int segmented_read(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return read_emulated(ctxt, ctxt->ops, linear, data, size); +} + +static int segmented_write(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_emulated(ctxt, linear, data, size, + &ctxt->exception); +} + +static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *orig_data, const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, + size, &ctxt->exception); +} + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned int size, unsigned short port, @@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, return 1; } -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) { if (selector & 1 << 2) { struct desc_struct desc; + u16 sel; + memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, - ctxt->vcpu)) + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ dt->address = get_desc_base(&desc); } else - ops->get_gdt(dt, ctxt->vcpu); + ops->get_gdt(ctxt, dt); } /* allowed just for 8 bytes segments */ @@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); + cpl = ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op) } } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; struct decode_cache *c = &ctxt->decode; @@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, break; case OP_MEM: if (c->lock_prefix) - rc = ops->cmpxchg_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.orig_val, - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_cmpxchg(ctxt, + c->dst.addr.mem, + &c->dst.orig_val, + &c->dst.val, + c->dst.bytes); else - rc = ops->write_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_write(ctxt, + c->dst.addr.mem, + &c->dst.val, + c->dst.bytes); if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_XMM: + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); + break; case OP_NONE: /* no writeback */ break; @@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct segmented_address addr; - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); - c->dst.addr.mem.seg = VCPU_SREG_SS; + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); + addr.seg = VCPU_SREG_SS; + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, void *dest, int len) { struct decode_cache *c = &ctxt->decode; @@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); addr.seg = VCPU_SREG_SS; - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); + rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_pop(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); +} + static int emulate_popf(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, void *dest, int len) @@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt); - rc = emulate_pop(ctxt, ops, &val, len); + rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int em_popf(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); + c->dst.type = OP_REG; + c->dst.addr.reg = &ctxt->eflags; + c->dst.bytes = c->op_bytes; + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} - emulate_push(ctxt, ops); +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = get_segment_selector(ctxt, seg); + + return em_push(ctxt); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, unsigned long selector; int rc; - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + rc = emulate_pop(ctxt, &selector, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - emulate_push(ctxt, ops); - - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ++reg; } - /* Disable writeback. */ - c->dst.type = OP_NONE; - return rc; } -static int emulate_popa(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pushf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = (unsigned long)ctxt->eflags; + return em_push(ctxt); +} + +static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, --reg; } - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; - c->dst.type = OP_NONE; - - ops->get_idt(&dt, ctxt->vcpu); + ops->get_idt(ctxt, &dt); eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; @@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add stack limit check */ - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (temp_eip & ~0xffff) return emulate_gp(ctxt, 0); - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_jmp_far(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned short sel; + + memcpy(&sel, c->src.valptr + c->op_bytes, 2); + + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) + return rc; + + c->eip = 0; + memcpy(&c->eip, c->src.valptr, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } -static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) +static int em_grp2(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; switch (c->modrm_reg) { @@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); break; } + return X86EMUL_CONTINUE; } -static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp3(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long *rax = &c->regs[VCPU_REGS_RAX]; @@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + int rc = X86EMUL_CONTINUE; switch (c->modrm_reg) { case 0: /* inc */ @@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); + break; case 6: /* push */ - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } - return X86EMUL_CONTINUE; + return rc; } -static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp9(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; u64 old = c->dst.orig_val64; @@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, int rc; unsigned long cs; - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); @@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, struct desc_struct *cs, struct desc_struct *ss) { + u16 selector; + memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; /* syscall is not available in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; - ops->get_msr(ctxt->vcpu, + ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? MSR_LSTAR : MSR_CSTAR, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~(msr_data | EFLG_RF); #endif } else { /* legacy mode */ - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); c->eip = (u32)msr_data; ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { case X86EMUL_MODE_PROT32: if ((msr_data & 0xfffc) == 0x0) @@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 - || is_long_mode(ctxt->vcpu)) { + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); c->regs[VCPU_REGS_RSP] = msr_data; return X86EMUL_CONTINUE; @@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.dpl = 3; ss.dpl = 3; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (usermode) { case X86EMUL_MODE_PROT32: cs_sel = (u16)(msr_data + 16); @@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, struct desc_struct tr_seg; u32 base3; int r; - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, - NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, save_state_to_tss16(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->cr3 = ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) + if (ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, save_state_to_tss32(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; /* FIXME: old_tss_base == ~0 ? */ @@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) + ops->cpl(ctxt) > next_tss_desc.dpl) return emulate_gp(ctxt, 0); } @@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, &next_tss_desc); } - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - emulate_push(ctxt, ops); + ret = em_push(ctxt); } return ret; @@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) { - rc = writeback(ctxt, ops); - if (rc == X86EMUL_CONTINUE) - ctxt->eip = c->eip; - } + if (rc == X86EMUL_CONTINUE) + ctxt->eip = c->eip; - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, @@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, op->addr.mem.seg = seg; } -static int em_push(struct x86_emulate_ctxt *ctxt) -{ - emulate_push(ctxt, ctxt->ops); - return X86EMUL_CONTINUE; -} - static int em_das(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); - if (rc != X86EMUL_CONTINUE) - return rc; - - c->dst.type = OP_NONE; - - return X86EMUL_CONTINUE; + return em_push(ctxt); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); return X86EMUL_CONTINUE; } +static int em_add(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_or(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_adc(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sbb(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_and(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sub(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_xor(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_cmp(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static int em_imul(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) - return emulate_gp(ctxt, 0); - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; return X86EMUL_CONTINUE; @@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movdqu(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_invlpg(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + ulong linear; + + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->ops->invlpg(ctxt, linear); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_clts(struct x86_emulate_ctxt *ctxt) +{ + ulong cr0; + + cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 &= ~X86_CR0_TS; + ctxt->ops->set_cr(ctxt, 0, cr0); + return X86EMUL_CONTINUE; +} + +static int em_vmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return X86EMUL_UNHANDLEABLE; + + rc = ctxt->ops->fix_hypercall(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->eip; + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_gdt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_vmmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + rc = ctxt->ops->fix_hypercall(ctxt); + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return rc; +} + +static int em_lidt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_idt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_smsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = 2; + c->dst.val = ctxt->ops->get_cr(ctxt, 0); + return X86EMUL_CONTINUE; +} + +static int em_lmsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) + | (c->src.val & 0x0f)); + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static bool valid_cr(int nr) +{ + switch (nr) { + case 0: + case 2 ... 4: + case 8: + return true; + default: + return false; + } +} + +static int check_cr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + if (!valid_cr(c->modrm_reg)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_cr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int cr = c->modrm_reg; + u64 efer = 0; + + static u64 cr_reserved_bits[] = { + 0xffffffff00000000ULL, + 0, 0, 0, /* CR3 checked later */ + CR4_RESERVED_BITS, + 0, 0, 0, + CR8_RESERVED_BITS, + }; + + if (!valid_cr(cr)) + return emulate_ud(ctxt); + + if (new_val & cr_reserved_bits[cr]) + return emulate_gp(ctxt, 0); + + switch (cr) { + case 0: { + u64 cr4; + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) + return emulate_gp(ctxt, 0); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && + !(cr4 & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + case 3: { + u64 rsvd = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + rsvd = CR3_L_MODE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) + rsvd = CR3_PAE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) + rsvd = CR3_NONPAE_RESERVED_BITS; + + if (new_val & rsvd) + return emulate_gp(ctxt, 0); + + break; + } + case 4: { + u64 cr4; + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + } + + return X86EMUL_CONTINUE; +} + +static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) +{ + unsigned long dr7; + + ctxt->ops->get_dr(ctxt, 7, &dr7); + + /* Check if DR7.Global_Enable is set */ + return dr7 & (1 << 13); +} + +static int check_dr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int dr = c->modrm_reg; + u64 cr4; + + if (dr > 7) + return emulate_ud(ctxt); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) + return emulate_ud(ctxt); + + if (check_dr7_gd(ctxt)) + return emulate_db(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_dr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int dr = c->modrm_reg; + + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) + return emulate_gp(ctxt, 0); + + return check_dr_read(ctxt); +} + +static int check_svme(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if (!(efer & EFER_SVME)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_svme_pa(struct x86_emulate_ctxt *ctxt) +{ + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; + + /* Valid physical address? */ + if (rax & 0xffff000000000000ULL) + return emulate_gp(ctxt, 0); + + return check_svme(ctxt); +} + +static int check_rdtsc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_rdpmc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; + + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || + (rcx > 3)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_in(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_out(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.bytes = min(c->src.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } +#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define N D(0) +#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define II(_f, _e, _i) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } +#define IIP(_f, _e, _i, _p) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } +#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) +#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) -#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +static struct opcode group7_rm1[] = { + DI(SrcNone | ModRM | Priv, monitor), + DI(SrcNone | ModRM | Priv, mwait), + N, N, N, N, N, N, +}; + +static struct opcode group7_rm3[] = { + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), +}; + +static struct opcode group7_rm7[] = { + N, + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + N, N, N, N, N, N, +}; static struct opcode group1[] = { - X7(D(Lock)), N + I(Lock, em_add), + I(Lock, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(0, em_cmp), }; static struct opcode group1A[] = { @@ -2366,16 +3073,28 @@ static struct opcode group5[] = { D(SrcMem | ModRM | Stack), N, }; +static struct opcode group6[] = { + DI(ModRM | Prot, sldt), + DI(ModRM | Prot, str), + DI(ModRM | Prot | Priv, lldt), + DI(ModRM | Prot | Priv, ltr), + N, N, N, N, +}; + static struct group_dual group7 = { { - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), + DI(ModRM | Mov | DstMem | Priv, sgdt), + DI(ModRM | Mov | DstMem | Priv, sidt), + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), + II(ModRM | SrcMem | Priv, em_lidt, lidt), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), N, - N, D(SrcNone | ModRM | Priv | VendorSpecific), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), N, + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + EXT(0, group7_rm1), + N, EXT(0, group7_rm3), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { @@ -2394,35 +3113,40 @@ static struct opcode group11[] = { I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), }; +static struct gprefix pfx_0f_6f_0f_7f = { + N, N, N, I(Sse, em_movdqu), +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - D6ALU(Lock), + I6ALU(Lock, em_add), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x08 - 0x0F */ - D6ALU(Lock), + I6ALU(Lock, em_or), D(ImplicitOps | Stack | No64), N, /* 0x10 - 0x17 */ - D6ALU(Lock), + I6ALU(Lock, em_adc), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x18 - 0x1F */ - D6ALU(Lock), + I6ALU(Lock, em_sbb), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x20 - 0x27 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_and), N, N, /* 0x28 - 0x2F */ - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - D6ALU(0), N, N, + I6ALU(0, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ - X8(D(DstReg | Stack)), + X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), + I(ImplicitOps | Stack | No64, em_pusha), + I(ImplicitOps | Stack | No64, em_popa), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , N, N, N, N, /* 0x68 - 0x6F */ @@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bv(DstDI | Mov | String), /* insb, insw/insd */ - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = { D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), /* 0x90 - 0x97 */ - X8(D(SrcAcc | DstReg)), + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, + II(ImplicitOps | Stack, em_pushf, pushf), + II(ImplicitOps | Stack, em_popf, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - D2bv(SrcSI | DstDI | String), + I2bv(SrcSI | DstDI | String, em_cmp), /* 0xA8 - 0xAF */ D2bv(DstAcc | SrcImm), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - D2bv(SrcAcc | DstDI | String), + I2bv(SrcAcc | DstDI | String, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ N, N, N, D(ImplicitOps | Stack), - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), + D(ImplicitOps), DI(SrcImmByte, intn), + D(ImplicitOps | No64), DI(ImplicitOps, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, N, N, N, @@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = { N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ X4(D(SrcImmByte)), - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), + D2bvIP(SrcNone | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), /* 0xF0 - 0xF7 */ - N, N, N, N, - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), + N, DI(ImplicitOps, icebp), N, N, + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), + G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), @@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ - N, GD(0, &group7), N, N, - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, + G(0, group6), GD(0, &group7), N, N, + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), - D(ImplicitOps | Priv), N, + DI(ImplicitOps | Priv, wrmsr), + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), + DI(ImplicitOps | Priv, rdmsr), + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, N, N, N, N, N, N, N, N, @@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = { /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x70 - 0x7F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ X16(D(SrcImm)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp), + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), N, N, /* 0xA8 - 0xAF */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), @@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = { #undef G #undef GD #undef I +#undef GP +#undef EXT #undef D2bv +#undef D2bvIP #undef I2bv -#undef D6ALU +#undef I6ALU static unsigned imm_size(struct decode_cache *c) { @@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset; - struct opcode opcode, *g_mod012, *g_mod3; + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; + bool op_prefix = false; + struct opcode opcode; struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; @@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->fetch.end = c->fetch.start + insn_len; if (insn_len > 0) memcpy(c->fetch.data, insn, insn_len); - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: @@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (c->b = insn_fetch(u8, 1, c->eip)) { case 0x66: /* operand-size override */ + op_prefix = true; /* switch between 2/4 bytes */ c->op_bytes = def_op_bytes ^ 6; break; @@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->lock_prefix = 1; break; case 0xf2: /* REPNE/REPNZ */ - c->rep_prefix = REPNE_PREFIX; - break; case 0xf3: /* REP/REPE/REPZ */ - c->rep_prefix = REPE_PREFIX; + c->rep_prefix = c->b; break; default: goto done_prefixes; @@ -2722,29 +3463,49 @@ done_prefixes: } c->d = opcode.flags; - if (c->d & Group) { - dual = c->d & GroupDual; - c->modrm = insn_fetch(u8, 1, c->eip); - --c->eip; - - if (c->d & GroupDual) { - g_mod012 = opcode.u.gdual->mod012; - g_mod3 = opcode.u.gdual->mod3; - } else - g_mod012 = g_mod3 = opcode.u.group; - - c->d &= ~(Group | GroupDual); - - goffset = (c->modrm >> 3) & 7; + while (c->d & GroupMask) { + switch (c->d & GroupMask) { + case Group: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + opcode = opcode.u.group[goffset]; + break; + case GroupDual: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + if ((c->modrm >> 6) == 3) + opcode = opcode.u.gdual->mod3[goffset]; + else + opcode = opcode.u.gdual->mod012[goffset]; + break; + case RMExt: + goffset = c->modrm & 7; + opcode = opcode.u.group[goffset]; + break; + case Prefix: + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + break; + default: + return X86EMUL_UNHANDLEABLE; + } - if ((c->modrm >> 6) == 3) - opcode = g_mod3[goffset]; - else - opcode = g_mod012[goffset]; + c->d &= ~GroupMask; c->d |= opcode.flags; } c->execute = opcode.u.execute; + c->check_perm = opcode.check_perm; + c->intercept = opcode.intercept; /* Unrecognised? */ if (c->d == 0 || (c->d & Undefined)) @@ -2763,6 +3524,9 @@ done_prefixes: c->op_bytes = 4; } + if (c->d & Sse) + c->op_bytes = 16; + /* ModRM and SIB bytes. */ if (c->d & ModRM) { rc = decode_modrm(ctxt, ops, &memop); @@ -2776,7 +3540,7 @@ done_prefixes: if (!c->has_seg_override) set_seg_override(c, VCPU_SREG_DS); - memop.addr.mem.seg = seg_override(ctxt, ops, c); + memop.addr.mem.seg = seg_override(ctxt, c); if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; @@ -2792,7 +3556,7 @@ done_prefixes: case SrcNone: break; case SrcReg: - decode_register_operand(&c->src, c, 0); + decode_register_operand(ctxt, &c->src, c, 0); break; case SrcMem16: memop.bytes = 2; @@ -2836,7 +3600,7 @@ done_prefixes: c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSI]); - c->src.addr.mem.seg = seg_override(ctxt, ops, c), + c->src.addr.mem.seg = seg_override(ctxt, c); c->src.val = 0; break; case SrcImmFAddr: @@ -2883,7 +3647,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case DstReg: - decode_register_operand(&c->dst, c, + decode_register_operand(ctxt, &c->dst, c, c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstImmUByte: @@ -2926,7 +3690,7 @@ done_prefixes: } done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) @@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if ((c->d & Sse) + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); + goto done; + } + + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } + /* Instruction can only be executed in protected mode */ + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + rc = emulate_ud(ctxt); + goto done; + } + + /* Do instruction specific permission checks */ + if (c->check_perm) { + rc = c->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { @@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), - c->src.valptr, c->src.bytes); + rc = segmented_read(ctxt, c->src.addr.mem, + c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), - &c->src2.val, c->src2.bytes); + rc = segmented_read(ctxt, c->src2.addr.mem, + &c->src2.val, c->src2.bytes); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { /* optimisation - avoid slow emulated read if Mov */ - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), + rc = segmented_read(ctxt, c->dst.addr.mem, &c->dst.val, c->dst.bytes); if (rc != X86EMUL_CONTINUE) goto done; @@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->execute) { rc = c->execute(ctxt); if (rc != X86EMUL_CONTINUE) @@ -3034,75 +3844,33 @@ special_insn: goto twobyte_insn; switch (c->b) { - case 0x00 ... 0x05: - add: /* add */ - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); - break; case 0x06: /* push es */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); break; - case 0x08 ... 0x0d: - or: /* or */ - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); - break; case 0x0e: /* push cs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); - break; - case 0x10 ... 0x15: - adc: /* adc */ - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; case 0x16: /* push ss */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); break; - case 0x18 ... 0x1d: - sbb: /* sbb */ - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); - break; case 0x1e: /* push ds */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); break; - case 0x20 ... 0x25: - and: /* and */ - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); - break; - case 0x28 ... 0x2d: - sub: /* sub */ - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); - break; - case 0x30 ... 0x35: - xor: /* xor */ - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); - break; - case 0x38 ... 0x3d: - cmp: /* cmp */ - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); break; case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x58 ... 0x5f: /* pop reg */ - pop_instruction: - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0x60: /* pusha */ - rc = emulate_pusha(ctxt, ops); - break; - case 0x61: /* popa */ - rc = emulate_popa(ctxt, ops); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; @@ -3121,26 +3889,6 @@ special_insn: if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); break; - case 0x80 ... 0x83: /* Grp1 */ - switch (c->modrm_reg) { - case 0: - goto add; - case 1: - goto or; - case 2: - goto adc; - case 3: - goto sbb; - case 4: - goto and; - case 5: - goto sub; - case 6: - goto xor; - case 7: - goto cmp; - } - break; case 0x84 ... 0x85: test: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); @@ -3162,7 +3910,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; @@ -3187,7 +3935,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt, ops); + rc = em_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) @@ -3200,31 +3948,17 @@ special_insn: case 8: c->dst.val = (s32)c->dst.val; break; } break; - case 0x9c: /* pushf */ - c->src.val = (unsigned long) ctxt->eflags; - emulate_push(ctxt, ops); - break; - case 0x9d: /* popf */ - c->dst.type = OP_REG; - c->dst.addr.reg = &ctxt->eflags; - c->dst.bytes = c->op_bytes; - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0xa6 ... 0xa7: /* cmps */ - c->dst.type = OP_NONE; /* Disable writeback. */ - goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; - case 0xae ... 0xaf: /* scas */ - goto cmp; case 0xc0 ... 0xc1: - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xc3: /* ret */ c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - goto pop_instruction; + rc = em_pop(ctxt); + break; case 0xc4: /* les */ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); break; @@ -3252,11 +3986,11 @@ special_insn: rc = emulate_iret(ctxt, ops); break; case 0xd0 ... 0xd1: /* Grp2 */ - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ c->src.val = c->regs[VCPU_REGS_RCX]; - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); @@ -3278,23 +4012,14 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: { /* jmp far */ - unsigned short sel; - jump_far: - memcpy(&sel, c->src.valptr + c->op_bytes, 2); - - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) - goto done; - - c->eip = 0; - memcpy(&c->eip, c->src.valptr, c->op_bytes); + case 0xea: /* jmp far */ + rc = em_jmp_far(ctxt); break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -3304,11 +4029,6 @@ special_insn: case 0xed: /* in (e/r)ax,dx */ c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ @@ -3317,25 +4037,19 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->dst.val, - c->src.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } - ops->pio_out_emulated(c->src.bytes, c->dst.val, - &c->src.val, 1, ctxt->vcpu); + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, + &c->src.val, 1); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ - ctxt->vcpu->arch.halt_request = 1; + ctxt->ops->halt(ctxt); break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ ctxt->eflags ^= EFLG_CF; break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); + rc = em_grp3(ctxt); break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; @@ -3366,13 +4080,11 @@ special_insn: ctxt->eflags |= EFLG_DF; break; case 0xfe: /* Grp4 */ - grp45: - rc = emulate_grp45(ctxt, ops); + rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; - goto grp45; + rc = em_grp45(ctxt); + break; default: goto cannot_emulate; } @@ -3381,7 +4093,7 @@ special_insn: goto done; writeback: - rc = writeback(ctxt, ops); + rc = writeback(ctxt); if (rc != X86EMUL_CONTINUE) goto done; @@ -3392,7 +4104,7 @@ writeback: c->dst.type = saved_dst_type; if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt, ops, c), + string_addr_inc(ctxt, seg_override(ctxt, c), VCPU_REGS_RSI, &c->src); if ((c->d & DstMask) == DstDI) @@ -3427,115 +4139,34 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) ctxt->have_exception = true; + if (rc == X86EMUL_INTERCEPTED) + return EMULATION_INTERCEPTED; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: switch (c->b) { - case 0x01: /* lgdt, lidt, lmsw */ - switch (c->modrm_reg) { - u16 size; - unsigned long address; - - case 0: /* vmcall */ - if (c->modrm_mod != 3 || c->modrm_rm != 1) - goto cannot_emulate; - - rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - - /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 2: /* lgdt */ - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lgdt(ctxt->vcpu, size, address); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) { - switch (c->modrm_rm) { - case 1: - rc = kvm_fix_hypercall(ctxt->vcpu); - break; - default: - goto cannot_emulate; - } - } else { - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lidt(ctxt->vcpu, size, address); - } - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 4: /* smsw */ - c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); - break; - case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | - (c->src.val & 0x0f), ctxt->vcpu); - c->dst.type = OP_NONE; - break; - case 5: /* not defined */ - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, - linear(ctxt, c->src.addr.mem)); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - default: - goto cannot_emulate; - } - break; case 0x05: /* syscall */ rc = emulate_syscall(ctxt, ops); break; case 0x06: - emulate_clts(ctxt->vcpu); + rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - kvm_emulate_wbinvd(ctxt->vcpu); + (ctxt->ops->wbinvd)(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); break; case 0x22: /* mov reg, cr */ - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3543,16 +4174,9 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - - if (ops->set_dr(c->modrm_reg, c->src.val & + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? - ~0ULL : ~0U), ctxt->vcpu) < 0) { + ~0ULL : ~0U)) < 0) { /* #UD condition is already handled by the code above */ emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; @@ -3565,7 +4189,7 @@ twobyte_insn: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3574,7 +4198,7 @@ twobyte_insn: break; case 0x32: /* rdmsr */ - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3603,7 +4227,7 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -3620,7 +4244,7 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); @@ -3727,7 +4351,7 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); + rc = em_grp9(ctxt); break; default: goto cannot_emulate; @@ -3739,5 +4363,5 @@ twobyte_insn: goto writeback; cannot_emulate: - return -1; + return EMULATION_FAILED; } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48f..51a97426e791 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -33,7 +33,6 @@ struct kvm_kpit_state { }; struct kvm_pit { - unsigned long base_addresss; struct kvm_io_device dev; struct kvm_io_device speaker_dev; struct kvm *kvm; @@ -51,7 +50,6 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d149410..53e2d084bffb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_destroy_pic(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -void kvm_pic_clear_isr_ack(struct kvm *kvm); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) { @@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); -int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee7..28418054b880 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *pte, unsigned long mmu_seq) + const void *pte) { WARN_ON(1); } @@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, - u64 *spte, - const void *new, unsigned long mmu_seq) + struct kvm_mmu_page *sp, u64 *spte, + const void *new) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) { ++vcpu->kvm->stat.mmu_pde_zapped; @@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); - unsigned long mmu_seq; u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, invlpg_counter, r, flooded = 0; @@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - spin_lock(&vcpu->kvm->mmu_lock); if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; @@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word)) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, - mmu_seq); + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (!remote_flush && need_remote_flush(entry, *spte)) remote_flush = true; ++spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d865..e3f81418797e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -78,15 +78,21 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, +static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t table_gfn, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { pt_element_t ret; pt_element_t *table; struct page *page; + gpa_t gpa; - page = gfn_to_page(kvm, table_gfn); + gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (gpa == UNMAPPED_GVA) + return -EFAULT; + + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -117,6 +123,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; + pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; @@ -152,6 +159,9 @@ walk: pt_access = ACC_ALL; for (;;) { + gfn_t real_gfn; + unsigned long host_addr; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -160,43 +170,64 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (unlikely(real_gfn == UNMAPPED_GVA)) { + present = false; + break; + } + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (unlikely(kvm_is_error_hva(host_addr))) { + present = false; + break; + } + + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_gpte(pte)) { + if (unlikely(!is_present_gpte(pte))) { present = false; break; } - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, + walker->level))) { rsvd_fault = true; break; } - if (write_fault && !is_writable_pte(pte)) - if (user_fault || is_write_protection(vcpu)) - eperm = true; + if (unlikely(write_fault && !is_writable_pte(pte) + && (user_fault || is_write_protection(vcpu)))) + eperm = true; - if (user_fault && !(pte & PT_USER_MASK)) + if (unlikely(user_fault && !(pte & PT_USER_MASK))) eperm = true; #if PTTYPE == 64 - if (fetch_fault && (pte & PT64_NX_MASK)) + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) eperm = true; #endif - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + if (!eperm && !rsvd_fault + && unlikely(!(pte & PT_ACCESSED_MASK))) { + int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, - index, pte, pte|PT_ACCESSED_MASK)) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, + index, pte, pte|PT_ACCESSED_MASK); + if (ret < 0) { + present = false; + break; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -241,17 +272,21 @@ walk: --walker->level; } - if (!present || eperm || rsvd_fault) + if (unlikely(!present || eperm || rsvd_fault)) goto error; - if (write_fault && !is_dirty_gpte(pte)) { - bool ret; + if (write_fault && unlikely(!is_dirty_gpte(pte))) { + int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, pte|PT_DIRTY_MASK); - if (ret) + if (ret < 0) { + present = false; + goto error; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; @@ -325,7 +360,7 @@ no_present: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; @@ -342,8 +377,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, kvm_release_pfn_clean(pfn); return; } - if (mmu_notifier_retry(vcpu, mmu_seq)) - return; /* * we call mmu_set_spte() with host_writable = true because that diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e47..506e4fe23adc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -63,6 +63,10 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -93,14 +97,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -144,8 +140,13 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU static struct svm_direct_access_msrs { @@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -376,7 +378,6 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; @@ -569,6 +570,10 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); } @@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + svm_init_erratum_383(); return 0; @@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; @@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + return; + + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ + if (user_tsc_khz == 0) { + vcpu->arch.virtual_tsc_khz = 0; + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + vcpu->arch.virtual_tsc_khz = user_tsc_khz; + svm->tsc_ratio = ratio; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); @@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_TSC: { struct vmcb *vmcb = get_host_vmcb(svm); - *data = vmcb->control.tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); + break; } case MSR_STAR: @@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, @@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) @@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); @@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, } + +static struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), +}; + +#undef PRE_EX +#undef POST_EX +#undef POST_MEM + +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154c..4c3fa0f67469 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -128,8 +128,11 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; + u8 cpl; + bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; + ulong rflags; struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; @@ -159,6 +162,10 @@ struct vcpu_vmx { u32 ar; } tr, es, ds, fs, gs; } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ + struct kvm_save_segment seg[8]; + } segment_cache; int vpid; bool emulation_required; @@ -171,6 +178,15 @@ struct vcpu_vmx { bool rdtscp_enabled; }; +enum segment_cache_field { + SEG_FIELD_SEL = 0, + SEG_FIELD_BASE = 1, + SEG_FIELD_LIMIT = 2, + SEG_FIELD_AR = 3, + + SEG_FIELD_NR = 4 +}; + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; @@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + rflags = vmcs_readl(GUEST_RFLAGS); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + to_vmx(vcpu)->rflags = rflags; } - return rflags; + return to_vmx(vcpu)->rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void) } /* + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ + * ioctl. In this case the call-back should update internal vmx state to make + * the changes effective. + */ +static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + /* Nothing to do here */ +} + +/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) vmcs_write64(TSC_OFFSET, offset + adjustment); } +static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + return target_tsc - native_read_tsc(); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; #ifdef CONFIG_X86_64 case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_FS_BASE, data); break; case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: @@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); @@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } + vmx_segment_cache_clear(vmx); + vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) { u32 guest_tr_ar; + vmx_segment_cache_clear(to_vmx(vcpu)); + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", @@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_save_segment *save; u32 ar; @@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = save->limit; ar = save->ar; if (seg == VCPU_SREG_TR - || var->selector == vmcs_read16(sf->selector)) + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) goto use_saved_rmode_seg; } - var->base = vmcs_readl(sf->base); - var->limit = vmcs_read32(sf->limit); - var->selector = vmcs_read16(sf->selector); - ar = vmcs_read32(sf->ar_bytes); + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; @@ -2098,27 +2205,37 @@ use_saved_rmode_seg: static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment s; if (to_vmx(vcpu)->rmode.vm86_active) { vmx_get_segment(vcpu, &s, seg); return s.base; } - return vmcs_readl(sf->base); + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -static int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu) { if (!is_protmode(vcpu)) return 0; - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (!is_long_mode(vcpu) + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmcs_read16(GUEST_CS_SELECTOR) & 3; + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); + } + return to_vmx(vcpu)->cpl; +} + + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; + vmx_segment_cache_clear(vmx); + if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); vmx->rmode.tr.selector = var->selector; @@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); *db = (ar >> 14) & 1; *l = (ar >> 13) & 1; @@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (ret != 0) goto out; + vmx_segment_cache_clear(vmx); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; + vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; + if (to_vmx(vcpu)->nmi_known_unmasked) + return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; } @@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmx->vnmi_blocked_time = 0; } } else { + vmx->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) enum emulation_result er; vect_info = vmx->idt_vectoring_info; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info)) return handle_machine_check(vcpu); @@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) } error_code = 0; - rip = kvm_rip_read(vcpu); if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { @@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; @@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(vcpu, true); break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + return; + + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI - && is_machine_check(exit_intr_info))) + if (is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ @@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; bool unblock_nmi; u8 vector; bool idtv_info_valid; @@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { + if (vmx->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); @@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(&vmx->vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = @@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; @@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); @@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; if (!vmx->guest_msrs) { - err = -ENOMEM; goto uninit_vcpu; } @@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vmcs; if (vm_need_virtualize_apic_accesses(kvm)) - if (alloc_apic_access_page(kvm) != 0) + err = alloc_apic_access_page(kvm); + if (err) goto free_vmcs; if (enable_ept) { @@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { } +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + .compute_tsc_offset = vmx_compute_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf9..77c9d8673dc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,22 +60,12 @@ #include <asm/div64.h> #define MAX_IO_MSRS 256 -#define CR0_RESERVED_BITS \ - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR4_RESERVED_BITS \ - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXSAVE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) - -#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) - #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define emul_to_vcpu(ctxt) \ + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +bool kvm_has_tsc_control; +EXPORT_SYMBOL_GPL(kvm_has_tsc_control); +u32 kvm_max_guest_tsc_khz; +EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_nmi(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_NMI, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void) return ret; } -static inline u64 nsec_to_cycles(u64 nsec) +static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.virtual_tsc_khz) + return vcpu->arch.virtual_tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { u64 ret; @@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec) if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - ret = nsec * __this_cpu_read(cpu_tsc_khz); + ret = nsec * vcpu_tsc_khz(vcpu); do_div(ret, USEC_PER_SEC); return ret; } -static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) +static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, - &kvm->arch.virtual_tsc_shift, - &kvm->arch.virtual_tsc_mult); - kvm->arch.virtual_tsc_khz = this_tsc_khz; + &vcpu->arch.tsc_catchup_shift, + &vcpu->arch.tsc_catchup_mult); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) { u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, - vcpu->kvm->arch.virtual_tsc_mult, - vcpu->kvm->arch.virtual_tsc_shift); + vcpu->arch.tsc_catchup_mult, + vcpu->arch.tsc_catchup_shift); tsc += vcpu->arch.last_tsc_write; return tsc; } @@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) s64 sdiff; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = data - native_read_tsc(); + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; sdiff = data - kvm->arch.last_tsc_write; @@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * In that case, for a reliable TSC, we can match TSC offsets, * or make a best guest using elapsed value. */ - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && elapsed < 5ULL * NSEC_PER_SEC) { if (!check_tsc_unstable()) { offset = kvm->arch.last_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { - u64 delta = nsec_to_cycles(elapsed); + u64 delta = nsec_to_cycles(vcpu, elapsed); offset += delta; pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } @@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); kernel_ns = get_kernel_ns(); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - + this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_GET_TSC_KHZ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XCRS: r = cpu_has_xsave; break; + case KVM_CAP_TSC_CONTROL: + r = kvm_has_tsc_control; + break; default: r = 0; break; @@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { /* Make sure TSC doesn't go backwards */ - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + s64 tsc_delta; + u64 tsc; + + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : + tsc - vcpu->arch.last_guest_tsc; + if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); } static int is_efer_nx(void) @@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0xC0000001.edx */ + const u32 kvm_supported_word5_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | + F(PMM) | F(PMM_EN); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); @@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | + (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); entry->ebx = 0; entry->ecx = 0; @@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); + break; + case 0xC0000001: + entry->edx &= kvm_supported_word5_x86_features; + cpuid_mask(&entry->edx, 5); + break; + case 0xC0000002: + case 0xC0000003: + case 0xC0000004: + /*Now nothing to do, reserved for the future*/ + break; } kvm_x86_ops->set_supported_cpuid(function, entry); @@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (nent >= cpuid->nent) goto out_free; + /* Add support for Centaur's CPUID instruction. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + limit = cpuid_entries[nent - 1].eax; + for (func = 0xC0000001; + func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + } + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, cpuid->nent); @@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; } + case KVM_SET_TSC_KHZ: { + u32 user_tsc_khz; + + r = -EINVAL; + if (!kvm_has_tsc_control) + break; + + user_tsc_khz = (u32)arg; + + if (user_tsc_khz >= kvm_max_guest_tsc_khz) + goto out; + + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); + + r = 0; + goto out; + } + case KVM_GET_TSC_KHZ: { + r = -EIO; + if (check_tsc_unstable()) + goto out; + + r = vcpu_tsc_khz(vcpu); + + goto out; + } default: r = -EINVAL; } @@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void) static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + handled += n; + addr += n; + len -= n; + v += n; + } while (len); + + return handled; } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static void kvm_set_segment(struct kvm_vcpu *vcpu, @@ -3703,37 +3805,43 @@ out: } /* used for instruction fetching */ -static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access | PFERR_FETCH_MASK, exception); } -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, +static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); void *data = val; int r = X86EMUL_CONTINUE; @@ -3761,13 +3869,15 @@ out: return r; } -static int emulator_read_emulated(unsigned long addr, +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; + int handled; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -3794,18 +3904,24 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); + + if (handled == bytes) return X86EMUL_CONTINUE; - } + + gpa += handled; + bytes -= handled; + val += handled; trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; + vcpu->mmio_index = 0; return X86EMUL_IO_NEEDED; } @@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3847,25 +3964,35 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); + if (handled == bytes) return X86EMUL_CONTINUE; + gpa += handled; + bytes -= handled; + val += handled; + vcpu->mmio_needed = 1; + memcpy(vcpu->mmio_data, val, bytes); vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; - memcpy(vcpu->run->mmio.data, val, bytes); + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + vcpu->mmio_index = 0; return X86EMUL_CONTINUE; } -int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; @@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) #endif -static int emulator_cmpxchg_emulated(unsigned long addr, +static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; struct page *page; char *kaddr; @@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, emul_write: printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); - return emulator_write_emulated(addr, new, bytes, exception, vcpu); + return emulator_write_emulated(ctxt, addr, new, bytes, exception); } static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) +static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + if (vcpu->arch.pio.count) goto data_avail; @@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, return 0; } -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) +static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, + const void *val, unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + trace_kvm_pio(1, port, size, count); vcpu->arch.pio.port = port; @@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) return kvm_x86_ops->get_segment_base(vcpu, seg); } -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) +static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) { - kvm_mmu_invlpg(vcpu, address); - return X86EMUL_CONTINUE; + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) @@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulate_clts(struct kvm_vcpu *vcpu) +static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - kvm_x86_ops->fpu_activate(vcpu); - return X86EMUL_CONTINUE; + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); } -int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(vcpu, dr, dest); + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { - return __kvm_set_dr(vcpu, dr, value); + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); } static u64 mk_cr_64(u64 curr_cr, u32 new_val) @@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); unsigned long value; switch (cr) { @@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) return value; } -static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; switch (cr) { @@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) return res; } -static int emulator_get_cpl(struct kvm_vcpu *vcpu) +static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) +{ + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); +} + +static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); +} + +static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - return kvm_x86_ops->get_cpl(vcpu); + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_gdt(vcpu, dt); + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_idt(vcpu, dt); + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); } -static unsigned long emulator_get_cached_segment_base(int seg, - struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cached_segment_base( + struct x86_emulate_ctxt *ctxt, int seg) { - return get_segment_base(vcpu, seg); + return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu) +static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; - kvm_get_segment(vcpu, &var, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); + *selector = var.selector; if (var.unusable) return false; @@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, return true; } -static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu) +static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, + int seg) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - + var.selector = selector; var.base = get_desc_base(desc); #ifdef CONFIG_X86_64 var.base |= ((u64)base3) << 32; @@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, return; } -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) { - struct kvm_segment kvm_seg; + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} - kvm_get_segment(vcpu, &kvm_seg, seg); - return kvm_seg.selector; +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); } -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) +static void emulator_halt(struct x86_emulate_ctxt *ctxt) { - struct kvm_segment kvm_seg; + emul_to_vcpu(ctxt)->arch.halt_request = 1; +} - kvm_get_segment(vcpu, &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); +static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_disable(); + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); + /* + * CR0.TS may reference the host fpu state, not the guest fpu state, + * so it may be clear at this point. + */ + clts(); +} + +static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_enable(); +} + +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } static struct x86_emulate_ops emulate_ops = { @@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, + .get_segment = emulator_get_segment, + .set_segment = emulator_set_segment, .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, + .set_gdt = emulator_set_gdt, + .set_idt = emulator_set_idt, .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .set_msr = kvm_set_msr, - .get_msr = kvm_get_msr, + .set_msr = emulator_set_msr, + .get_msr = emulator_get_msr, + .halt = emulator_halt, + .wbinvd = emulator_wbinvd, + .fix_hypercall = emulator_fix_hypercall, + .get_fpu = emulator_get_fpu, + .put_fpu = emulator_put_fpu, + .intercept = emulator_intercept, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int cs_db, cs_l; + /* + * TODO: fix emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ cache_all_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : @@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ? X86EMUL_MODE_VM86 : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int ret; @@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.decode.op_bytes = 2; vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + + inc_eip; ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); if (ret != X86EMUL_CONTINUE) @@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.eip = c->eip; memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (irq == NMI_VECTOR) vcpu->arch.nmi_pending = false; @@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + bool writeback = true; kvm_clear_exception_queue(vcpu); - vcpu->arch.mmio_fault_cr2 = cr2; - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { init_emulate_ctxt(vcpu); @@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - /* this is needed for vmware backdor interface to work since it + /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + } restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); + if (r == EMULATION_INTERCEPTED) + return EMULATE_DONE; + if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2)) return EMULATE_DONE; @@ -4462,21 +4642,28 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; + else + writeback = false; r = EMULATE_DO_MMIO; } else if (vcpu->mmio_needed) { - if (vcpu->mmio_is_write) - vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + writeback = false; r = EMULATE_DO_MMIO; } else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + if (writeback) { + toggle_interruptibility(vcpu, + vcpu->arch.emulate_ctxt.interruptibility); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + } else + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; return r; } @@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, + size, port, &val, 1); /* do not return to emulator after return from userspace */ vcpu->arch.pio.count = 0; return ret; @@ -4879,8 +5067,9 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); @@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); -} - -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_gdt(vcpu, &dt); -} - -void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_idt(vcpu, &dt); + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, + rip, instruction, 3, NULL); } static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) @@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; + bool nmi_pending; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; @@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } - if (kvm_check_request(KVM_REQ_NMI, vcpu)) - vcpu->arch.nmi_pending = true; } r = kvm_mmu_reload(vcpu); if (unlikely(r)) goto out; + /* + * An NMI can be injected between local nmi_pending read and + * vcpu->arch.nmi_pending read inside inject_pending_event(). + * But in that case, KVM_REQ_EVENT will be set, which makes + * the race described above benign. + */ + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) + if (nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); @@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int complete_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) + return 1; + + if (vcpu->mmio_needed) { + vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + memcpy(vcpu->mmio_data + vcpu->mmio_index, + run->mmio.data, 8); + vcpu->mmio_index += 8; + if (vcpu->mmio_index < vcpu->mmio_size) { + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->mmio_needed = 1; + return 0; + } + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; + } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - if (vcpu->arch.pio.count || vcpu->mmio_needed) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) { - r = 0; - goto out; - } - } + r = complete_mmio(vcpu); + if (r <= 0) + goto out; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); @@ -5455,6 +5663,18 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { + /* + * We are here if userspace calls get_regs() in the middle of + * instruction emulation. Registers state needs to be copied + * back from emulation context to vcpu. Usrapace shouldn't do + * that usually, but some bad designed PV devices (vmware + * backdoor interface) need this to work + */ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); @@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } @@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - if (!kvm->arch.virtual_tsc_khz) - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); + kvm_init_tsc_catchup(vcpu, max_tsc_khz); r = kvm_mmu_create(vcpu); if (r < 0) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce0..e407ed3df817 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -77,7 +77,7 @@ static inline u32 bit(int bitno) void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 16dc3645291c..3e904717c1c0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -777,9 +777,9 @@ static int rbd_do_request(struct request *rq, ops, false, GFP_NOIO, pages, bio); - if (IS_ERR(req)) { + if (!req) { up_read(&header->snap_rwsem); - ret = PTR_ERR(req); + ret = -ENOMEM; goto done_pages; } diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index f903d7b6f34a..23d1468ad253 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2199,7 +2199,6 @@ static int ohci_set_config_rom(struct fw_card *card, { struct fw_ohci *ohci; unsigned long flags; - int ret = -EBUSY; __be32 *next_config_rom; dma_addr_t uninitialized_var(next_config_rom_bus); @@ -2240,22 +2239,37 @@ static int ohci_set_config_rom(struct fw_card *card, spin_lock_irqsave(&ohci->lock, flags); + /* + * If there is not an already pending config_rom update, + * push our new allocation into the ohci->next_config_rom + * and then mark the local variable as null so that we + * won't deallocate the new buffer. + * + * OTOH, if there is a pending config_rom update, just + * use that buffer with the new config_rom data, and + * let this routine free the unused DMA allocation. + */ + if (ohci->next_config_rom == NULL) { ohci->next_config_rom = next_config_rom; ohci->next_config_rom_bus = next_config_rom_bus; + next_config_rom = NULL; + } - copy_config_rom(ohci->next_config_rom, config_rom, length); + copy_config_rom(ohci->next_config_rom, config_rom, length); - ohci->next_header = config_rom[0]; - ohci->next_config_rom[0] = 0; + ohci->next_header = config_rom[0]; + ohci->next_config_rom[0] = 0; - reg_write(ohci, OHCI1394_ConfigROMmap, - ohci->next_config_rom_bus); - ret = 0; - } + reg_write(ohci, OHCI1394_ConfigROMmap, ohci->next_config_rom_bus); spin_unlock_irqrestore(&ohci->lock, flags); + /* If we didn't use the DMA allocation, delete it. */ + if (next_config_rom != NULL) + dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, + next_config_rom, next_config_rom_bus); + /* * Now initiate a bus reset to have the changes take * effect. We clean up the old config rom memory and DMA @@ -2263,13 +2277,10 @@ static int ohci_set_config_rom(struct fw_card *card, * controller could need to access it before the bus reset * takes effect. */ - if (ret == 0) - fw_schedule_bus_reset(&ohci->card, true, true); - else - dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, - next_config_rom, next_config_rom_bus); - return ret; + fw_schedule_bus_reset(&ohci->card, true, true); + + return 0; } static void ohci_send_request(struct fw_card *card, struct fw_packet *packet) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 5d00b0fc0d91..959186cbf328 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -431,7 +431,7 @@ EXPORT_SYMBOL(drm_mm_search_free_in_range); void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) { list_replace(&old->node_list, &new->node_list); - list_replace(&old->node_list, &new->hole_stack); + list_replace(&old->hole_stack, &new->hole_stack); new->hole_follows = old->hole_follows; new->mm = old->mm; new->start = old->start; @@ -699,8 +699,8 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm) entry->size); total_used += entry->size; if (entry->hole_follows) { - hole_start = drm_mm_hole_node_start(&mm->head_node); - hole_end = drm_mm_hole_node_end(&mm->head_node); + hole_start = drm_mm_hole_node_start(entry); + hole_end = drm_mm_hole_node_end(entry); hole_size = hole_end - hole_start; seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: free\n", hole_start, hole_end, hole_size); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e522c702b04e..373c2a005ec1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5154,8 +5154,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(DSPCNTR(plane), dspcntr); POSTING_READ(DSPCNTR(plane)); - if (!HAS_PCH_SPLIT(dev)) - intel_enable_plane(dev_priv, plane, pipe); ret = intel_pipe_set_base(crtc, x, y, old_fb); @@ -5605,9 +5603,9 @@ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc) intel_clock_t clock; if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0) - fp = FP0(pipe); + fp = I915_READ(FP0(pipe)); else - fp = FP1(pipe); + fp = I915_READ(FP1(pipe)); clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT; if (IS_PINEVIEW(dev)) { @@ -6579,8 +6577,10 @@ intel_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOENT); intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); - if (!intel_fb) + if (!intel_fb) { + drm_gem_object_unreference_unlocked(&obj->base); return ERR_PTR(-ENOMEM); + } ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index cb8578b7e443..a4d80314e7f8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1470,7 +1470,8 @@ intel_dp_link_down(struct intel_dp *intel_dp) if (!HAS_PCH_CPT(dev) && I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) { - struct intel_crtc *intel_crtc = to_intel_crtc(intel_dp->base.base.crtc); + struct drm_crtc *crtc = intel_dp->base.base.crtc; + /* Hardware workaround: leaving our transcoder select * set to transcoder B while it's off will prevent the * corresponding HDMI output on transcoder A. @@ -1485,7 +1486,19 @@ intel_dp_link_down(struct intel_dp *intel_dp) /* Changes to enable or select take place the vblank * after being written. */ - intel_wait_for_vblank(dev, intel_crtc->pipe); + if (crtc == NULL) { + /* We can arrive here never having been attached + * to a CRTC, for instance, due to inheriting + * random state from the BIOS. + * + * If the pipe is not running, play safe and + * wait for the clocks to stabilise before + * continuing. + */ + POSTING_READ(intel_dp->output_reg); + msleep(50); + } else + intel_wait_for_vblank(dev, to_intel_crtc(crtc)->pipe); } I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index a562bd2648c7..67cb076d271b 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -539,6 +539,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, struct drm_device *dev = dev_priv->dev; struct drm_connector *connector = dev_priv->int_lvds_connector; + if (dev->switch_power_state != DRM_SWITCH_POWER_ON) + return NOTIFY_OK; + /* * check and update the status of LVDS connector after receiving * the LID nofication event. diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 5045f8b921d6..c3e953b08992 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -152,8 +152,6 @@ nouveau_mem_vram_fini(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; - nouveau_bo_ref(NULL, &dev_priv->vga_ram); - ttm_bo_device_release(&dev_priv->ttm.bdev); nouveau_ttm_global_release(dev_priv); diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index a30adec5beaa..915fbce89595 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -768,6 +768,11 @@ static void nouveau_card_takedown(struct drm_device *dev) engine->mc.takedown(dev); engine->display.late_takedown(dev); + if (dev_priv->vga_ram) { + nouveau_bo_unpin(dev_priv->vga_ram); + nouveau_bo_ref(NULL, &dev_priv->vga_ram); + } + mutex_lock(&dev->struct_mutex); ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT); diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index f116516bfef7..dd881d035f09 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -431,7 +431,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } } - /* Acer laptop (Acer TravelMate 5730G) has an HDMI port + /* Acer laptop (Acer TravelMate 5730/5730G) has an HDMI port * on the laptop and a DVI port on the docking station and * both share the same encoder, hpd pin, and ddc line. * So while the bios table is technically correct, @@ -440,7 +440,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, * with different crtcs which isn't possible on the hardware * side and leaves no crtcs for LVDS or VGA. */ - if ((dev->pdev->device == 0x95c4) && + if (((dev->pdev->device == 0x95c4) || (dev->pdev->device == 0x9591)) && (dev->pdev->subsystem_vendor == 0x1025) && (dev->pdev->subsystem_device == 0x013c)) { if ((*connector_type == DRM_MODE_CONNECTOR_DVII) && diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index ed5dfe58f29c..9d95792bea3e 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -15,6 +15,9 @@ #define ATPX_VERSION 0 #define ATPX_GPU_PWR 2 #define ATPX_MUX_SELECT 3 +#define ATPX_I2C_MUX_SELECT 4 +#define ATPX_SWITCH_START 5 +#define ATPX_SWITCH_END 6 #define ATPX_INTEGRATED 0 #define ATPX_DISCRETE 1 @@ -149,13 +152,35 @@ static int radeon_atpx_switch_mux(acpi_handle handle, int mux_id) return radeon_atpx_execute(handle, ATPX_MUX_SELECT, mux_id); } +static int radeon_atpx_switch_i2c_mux(acpi_handle handle, int mux_id) +{ + return radeon_atpx_execute(handle, ATPX_I2C_MUX_SELECT, mux_id); +} + +static int radeon_atpx_switch_start(acpi_handle handle, int gpu_id) +{ + return radeon_atpx_execute(handle, ATPX_SWITCH_START, gpu_id); +} + +static int radeon_atpx_switch_end(acpi_handle handle, int gpu_id) +{ + return radeon_atpx_execute(handle, ATPX_SWITCH_END, gpu_id); +} static int radeon_atpx_switchto(enum vga_switcheroo_client_id id) { + int gpu_id; + if (id == VGA_SWITCHEROO_IGD) - radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 0); + gpu_id = ATPX_INTEGRATED; else - radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 1); + gpu_id = ATPX_DISCRETE; + + radeon_atpx_switch_start(radeon_atpx_priv.atpx_handle, gpu_id); + radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, gpu_id); + radeon_atpx_switch_i2c_mux(radeon_atpx_priv.atpx_handle, gpu_id); + radeon_atpx_switch_end(radeon_atpx_priv.atpx_handle, gpu_id); + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index bdf2fa1189ae..3189a7efb2e9 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -167,9 +167,6 @@ int radeon_crtc_cursor_set(struct drm_crtc *crtc, return -EINVAL; } - radeon_crtc->cursor_width = width; - radeon_crtc->cursor_height = height; - obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); if (!obj) { DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id); @@ -180,6 +177,9 @@ int radeon_crtc_cursor_set(struct drm_crtc *crtc, if (ret) goto fail; + radeon_crtc->cursor_width = width; + radeon_crtc->cursor_height = height; + radeon_lock_cursor(crtc, true); /* XXX only 27 bit offset for legacy cursor */ radeon_set_cursor(crtc, obj, gpu_addr); diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig index ccbd39a38c46..c545039287ad 100644 --- a/drivers/media/dvb/dvb-usb/Kconfig +++ b/drivers/media/dvb/dvb-usb/Kconfig @@ -356,6 +356,8 @@ config DVB_USB_LME2510 select DVB_TDA826X if !DVB_FE_CUSTOMISE select DVB_STV0288 if !DVB_FE_CUSTOMISE select DVB_IX2505V if !DVB_FE_CUSTOMISE + select DVB_STV0299 if !DVB_FE_CUSTOMISE + select DVB_PLL if !DVB_FE_CUSTOMISE help Say Y here to support the LME DM04/QQBOX DVB-S USB2.0 . diff --git a/drivers/media/dvb/ngene/ngene-core.c b/drivers/media/dvb/ngene/ngene-core.c index ccc2d1af49d4..6927c726ce35 100644 --- a/drivers/media/dvb/ngene/ngene-core.c +++ b/drivers/media/dvb/ngene/ngene-core.c @@ -1520,6 +1520,7 @@ static int init_channel(struct ngene_channel *chan) if (dev->ci.en && (io & NGENE_IO_TSOUT)) { dvb_ca_en50221_init(adapter, dev->ci.en, 0, 1); set_transfer(chan, 1); + chan->dev->channel[2].DataFormatFlags = DF_SWAP32; set_transfer(&chan->dev->channel[2], 1); dvb_register_device(adapter, &chan->ci_dev, &ngene_dvbdev_ci, (void *) chan, diff --git a/drivers/media/radio/saa7706h.c b/drivers/media/radio/saa7706h.c index 585680ffbfb6..b1193dfc5087 100644 --- a/drivers/media/radio/saa7706h.c +++ b/drivers/media/radio/saa7706h.c @@ -376,7 +376,7 @@ static int __devinit saa7706h_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%02x (%s)\n", client->addr << 1, client->adapter->name); - state = kmalloc(sizeof(struct saa7706h_state), GFP_KERNEL); + state = kzalloc(sizeof(struct saa7706h_state), GFP_KERNEL); if (state == NULL) return -ENOMEM; sd = &state->sd; diff --git a/drivers/media/radio/tef6862.c b/drivers/media/radio/tef6862.c index 7c0d77751f6e..0991e1973678 100644 --- a/drivers/media/radio/tef6862.c +++ b/drivers/media/radio/tef6862.c @@ -176,7 +176,7 @@ static int __devinit tef6862_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%02x (%s)\n", client->addr << 1, client->adapter->name); - state = kmalloc(sizeof(struct tef6862_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tef6862_state), GFP_KERNEL); if (state == NULL) return -ENOMEM; state->freq = TEF6862_LO_FREQ; diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index ebd68edf5b24..8fc0f081b470 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -46,7 +46,7 @@ #define MOD_AUTHOR "Jarod Wilson <[email protected]>" #define MOD_DESC "Driver for SoundGraph iMON MultiMedia IR/Display" #define MOD_NAME "imon" -#define MOD_VERSION "0.9.2" +#define MOD_VERSION "0.9.3" #define DISPLAY_MINOR_BASE 144 #define DEVICE_NAME "lcd%d" @@ -460,8 +460,9 @@ static int display_close(struct inode *inode, struct file *file) } /** - * Sends a packet to the device -- this function must be called - * with ictx->lock held. + * Sends a packet to the device -- this function must be called with + * ictx->lock held, or its unlock/lock sequence while waiting for tx + * to complete can/will lead to a deadlock. */ static int send_packet(struct imon_context *ictx) { @@ -991,12 +992,21 @@ static void imon_touch_display_timeout(unsigned long data) * the iMON remotes, and those used by the Windows MCE remotes (which is * really just RC-6), but only one or the other at a time, as the signals * are decoded onboard the receiver. + * + * This function gets called two different ways, one way is from + * rc_register_device, for initial protocol selection/setup, and the other is + * via a userspace-initiated protocol change request, either by direct sysfs + * prodding or by something like ir-keytable. In the rc_register_device case, + * the imon context lock is already held, but when initiated from userspace, + * it is not, so we must acquire it prior to calling send_packet, which + * requires that the lock is held. */ static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type) { int retval; struct imon_context *ictx = rc->priv; struct device *dev = ictx->dev; + bool unlock = false; unsigned char ir_proto_packet[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 }; @@ -1029,6 +1039,11 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type) memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet)); + if (!mutex_is_locked(&ictx->lock)) { + unlock = true; + mutex_lock(&ictx->lock); + } + retval = send_packet(ictx); if (retval) goto out; @@ -1037,6 +1052,9 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type) ictx->pad_mouse = false; out: + if (unlock) + mutex_unlock(&ictx->lock); + return retval; } @@ -2134,6 +2152,7 @@ static struct imon_context *imon_init_intf0(struct usb_interface *intf) goto rdev_setup_failed; } + mutex_unlock(&ictx->lock); return ictx; rdev_setup_failed: @@ -2205,6 +2224,7 @@ static struct imon_context *imon_init_intf1(struct usb_interface *intf, goto urb_submit_failed; } + mutex_unlock(&ictx->lock); return ictx; urb_submit_failed: @@ -2299,6 +2319,8 @@ static int __devinit imon_probe(struct usb_interface *interface, usb_set_intfdata(interface, ictx); if (ifnum == 0) { + mutex_lock(&ictx->lock); + if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); @@ -2309,13 +2331,14 @@ static int __devinit imon_probe(struct usb_interface *interface, if (ictx->display_supported) imon_init_display(ictx, interface); + + mutex_unlock(&ictx->lock); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on " "usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); - mutex_unlock(&ictx->lock); mutex_unlock(&driver_lock); return 0; diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index accaf6c9789a..43908a70bd8b 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -36,6 +36,7 @@ #include <linux/io.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <linux/delay.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/bitops.h> diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index 044fb7a382d6..0c273ec465c9 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -220,6 +220,8 @@ static struct usb_device_id mceusb_dev_table[] = { { USB_DEVICE(VENDOR_PHILIPS, 0x206c) }, /* Philips/Spinel plus IR transceiver for ASUS */ { USB_DEVICE(VENDOR_PHILIPS, 0x2088) }, + /* Philips IR transceiver (Dell branded) */ + { USB_DEVICE(VENDOR_PHILIPS, 0x2093) }, /* Realtek MCE IR Receiver and card reader */ { USB_DEVICE(VENDOR_REALTEK, 0x0161), .driver_info = MULTIFUNCTION }, diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index f53f9c68d38d..a2706648e365 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -707,7 +707,8 @@ static void ir_close(struct input_dev *idev) { struct rc_dev *rdev = input_get_drvdata(idev); - rdev->close(rdev); + if (rdev) + rdev->close(rdev); } /* class for /sys/class/rc */ @@ -733,6 +734,7 @@ static struct { { RC_TYPE_SONY, "sony" }, { RC_TYPE_RC5_SZ, "rc-5-sz" }, { RC_TYPE_LIRC, "lirc" }, + { RC_TYPE_OTHER, "other" }, }; #define PROTO_NONE "none" diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c index 5e1c9a81984c..303ffa7df4ac 100644 --- a/drivers/media/video/m52790.c +++ b/drivers/media/video/m52790.c @@ -174,7 +174,7 @@ static int m52790_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - state = kmalloc(sizeof(struct m52790_state), GFP_KERNEL); + state = kzalloc(sizeof(struct m52790_state), GFP_KERNEL); if (state == NULL) return -ENOMEM; diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c index 5d4cf3b3d435..22fa8202d5ca 100644 --- a/drivers/media/video/tda9840.c +++ b/drivers/media/video/tda9840.c @@ -171,7 +171,7 @@ static int tda9840_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); + sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); if (sd == NULL) return -ENOMEM; v4l2_i2c_subdev_init(sd, client, &tda9840_ops); diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c index 19621ed523ec..827425c5b866 100644 --- a/drivers/media/video/tea6415c.c +++ b/drivers/media/video/tea6415c.c @@ -152,7 +152,7 @@ static int tea6415c_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); + sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); if (sd == NULL) return -ENOMEM; v4l2_i2c_subdev_init(sd, client, &tea6415c_ops); diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c index 5ea840401f21..f350b6c24500 100644 --- a/drivers/media/video/tea6420.c +++ b/drivers/media/video/tea6420.c @@ -125,7 +125,7 @@ static int tea6420_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); + sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL); if (sd == NULL) return -ENOMEM; v4l2_i2c_subdev_init(sd, client, &tea6420_ops); diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c index f8138c75be8b..1aab96a88203 100644 --- a/drivers/media/video/upd64031a.c +++ b/drivers/media/video/upd64031a.c @@ -230,7 +230,7 @@ static int upd64031a_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - state = kmalloc(sizeof(struct upd64031a_state), GFP_KERNEL); + state = kzalloc(sizeof(struct upd64031a_state), GFP_KERNEL); if (state == NULL) return -ENOMEM; sd = &state->sd; diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c index 28e0e6b6ca84..9bbe61700fd5 100644 --- a/drivers/media/video/upd64083.c +++ b/drivers/media/video/upd64083.c @@ -202,7 +202,7 @@ static int upd64083_probe(struct i2c_client *client, v4l_info(client, "chip found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); - state = kmalloc(sizeof(struct upd64083_state), GFP_KERNEL); + state = kzalloc(sizeof(struct upd64083_state), GFP_KERNEL); if (state == NULL) return -ENOMEM; sd = &state->sd; diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 53450f433f10..2e165117457b 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -25,7 +25,6 @@ #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/gpio.h> -#include <linux/regulator/consumer.h> #include <plat/usb.h> #define USBHS_DRIVER_NAME "usbhs-omap" @@ -700,8 +699,7 @@ static int usbhs_enable(struct device *dev) dev_dbg(dev, "starting TI HSUSB Controller\n"); if (!pdata) { dev_dbg(dev, "missing platform_data\n"); - ret = -ENODEV; - goto end_enable; + return -ENODEV; } spin_lock_irqsave(&omap->lock, flags); @@ -915,7 +913,8 @@ static int usbhs_enable(struct device *dev) end_count: omap->count++; - goto end_enable; + spin_unlock_irqrestore(&omap->lock, flags); + return 0; err_tll: if (pdata->ehci_data->phy_reset) { @@ -931,8 +930,6 @@ err_tll: clk_disable(omap->usbhost_fs_fck); clk_disable(omap->usbhost_hs_fck); clk_disable(omap->usbhost_ick); - -end_enable: spin_unlock_irqrestore(&omap->lock, flags); return ret; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index dc280bc8eba2..6c884ef1b069 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2536,7 +2536,7 @@ config S6GMAC source "drivers/net/stmmac/Kconfig" config PCH_GBE - tristate "PCH Gigabit Ethernet" + tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE" depends on PCI select MII ---help--- @@ -2548,6 +2548,12 @@ config PCH_GBE to Gigabit Ethernet. This driver enables Gigabit Ethernet function. + This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ + Output Hub), ML7223. + ML7223 IOH is for MP(Media Phone) use. + ML7223 is companion chip for Intel Atom E6xx series. + ML7223 is completely compatible for Intel EG20T PCH. + endif # NETDEV_1000 # diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h index 66823eded7a3..2353eca32593 100644 --- a/drivers/net/benet/be.h +++ b/drivers/net/benet/be.h @@ -213,7 +213,7 @@ struct be_rx_stats { struct be_rx_compl_info { u32 rss_hash; - u16 vid; + u16 vlan_tag; u16 pkt_size; u16 rxq_idx; u16 mac_id; diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c index 1e2d825bb94a..9dc9394fd4ca 100644 --- a/drivers/net/benet/be_cmds.c +++ b/drivers/net/benet/be_cmds.c @@ -132,7 +132,7 @@ static void be_async_grp5_pvid_state_process(struct be_adapter *adapter, struct be_async_event_grp5_pvid_state *evt) { if (evt->enabled) - adapter->pvid = evt->tag; + adapter->pvid = le16_to_cpu(evt->tag); else adapter->pvid = 0; } diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 02a0443d1821..9187fb4e08f1 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -1018,7 +1018,8 @@ static void be_rx_compl_process(struct be_adapter *adapter, kfree_skb(skb); return; } - vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, rxcp->vid); + vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, + rxcp->vlan_tag); } else { netif_receive_skb(skb); } @@ -1076,7 +1077,8 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, if (likely(!rxcp->vlanf)) napi_gro_frags(&eq_obj->napi); else - vlan_gro_frags(&eq_obj->napi, adapter->vlan_grp, rxcp->vid); + vlan_gro_frags(&eq_obj->napi, adapter->vlan_grp, + rxcp->vlan_tag); } static void be_parse_rx_compl_v1(struct be_adapter *adapter, @@ -1102,7 +1104,8 @@ static void be_parse_rx_compl_v1(struct be_adapter *adapter, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, cast_enc, compl); rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vtm, compl); - rxcp->vid = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vlan_tag, compl); + rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vlan_tag, + compl); } static void be_parse_rx_compl_v0(struct be_adapter *adapter, @@ -1128,7 +1131,8 @@ static void be_parse_rx_compl_v0(struct be_adapter *adapter, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, cast_enc, compl); rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vtm, compl); - rxcp->vid = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vlan_tag, compl); + rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vlan_tag, + compl); } static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo) @@ -1155,9 +1159,11 @@ static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo) rxcp->vlanf = 0; if (!lancer_chip(adapter)) - rxcp->vid = swab16(rxcp->vid); + rxcp->vlan_tag = swab16(rxcp->vlan_tag); - if ((adapter->pvid == rxcp->vid) && !adapter->vlan_tag[rxcp->vid]) + if (((adapter->pvid & VLAN_VID_MASK) == + (rxcp->vlan_tag & VLAN_VID_MASK)) && + !adapter->vlan_tag[rxcp->vlan_tag]) rxcp->vlanf = 0; /* As the compl has been parsed, reset it; we wont touch it again */ diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index a358ea9445a2..f501bba1fc6f 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -346,10 +346,10 @@ static void sja1000_rx(struct net_device *dev) | (priv->read_reg(priv, REG_ID2) >> 5); } + cf->can_dlc = get_can_dlc(fi & 0x0F); if (fi & FI_RTR) { id |= CAN_RTR_FLAG; } else { - cf->can_dlc = get_can_dlc(fi & 0x0F); for (i = 0; i < cf->can_dlc; i++) cf->data[i] = priv->read_reg(priv, dreg++); } diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index b423965a78d1..1b49df6b2470 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -583,7 +583,9 @@ static int slcan_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ - return sl->dev->base_addr; + + /* TTY layer expects 0 on success */ + return 0; err_free_chan: sl->tty = NULL; diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c index 3e2e734fecb7..f3bbdcef338c 100644 --- a/drivers/net/ehea/ehea_ethtool.c +++ b/drivers/net/ehea/ehea_ethtool.c @@ -55,15 +55,20 @@ static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex = -1; } - cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_1000baseT_Full - | SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half - | SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half - | SUPPORTED_Autoneg | SUPPORTED_FIBRE); - - cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Autoneg - | ADVERTISED_FIBRE); + if (cmd->speed == SPEED_10000) { + cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + cmd->port = PORT_FIBRE; + } else { + cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full + | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full + | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg + | SUPPORTED_TP); + cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg + | ADVERTISED_TP); + cmd->port = PORT_TP; + } - cmd->port = PORT_FIBRE; cmd->autoneg = port->autoneg == 1 ? AUTONEG_ENABLE : AUTONEG_DISABLE; return 0; diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 2ef2f9cdefa6..56d049a472da 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -34,6 +34,10 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_COPYBREAK_DEFAULT 256 #define PCH_GBE_PCI_BAR 1 +/* Macros for ML7223 */ +#define PCI_VENDOR_ID_ROHM 0x10db +#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 + #define PCH_GBE_TX_WEIGHT 64 #define PCH_GBE_RX_WEIGHT 64 #define PCH_GBE_RX_BUFFER_WRITE 16 @@ -43,8 +47,7 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_MAC_RGMII_CTRL_SETTING ( \ PCH_GBE_CHIP_TYPE_INTERNAL | \ - PCH_GBE_RGMII_MODE_RGMII | \ - PCH_GBE_CRS_SEL \ + PCH_GBE_RGMII_MODE_RGMII \ ) /* Ethertype field values */ @@ -1494,12 +1497,11 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, /* Write meta date of skb */ skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if ((tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) == - PCH_GBE_RXD_ACC_STAT_TCPIPOK) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { + if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) skb->ip_summed = CHECKSUM_NONE; - } + else + skb->ip_summed = CHECKSUM_UNNECESSARY; + napi_gro_receive(&adapter->napi, skb); (*work_done)++; pr_debug("Receive skb->ip_summed: %d length: %d\n", @@ -2420,6 +2422,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = { .class = (PCI_CLASS_NETWORK_ETHERNET << 8), .class_mask = (0xFFFF00) }, + {.vendor = PCI_VENDOR_ID_ROHM, + .device = PCI_DEVICE_ID_ROHM_ML7223_GBE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = (PCI_CLASS_NETWORK_ETHERNET << 8), + .class_mask = (0xFFFF00) + }, /* required last entry */ {0} }; diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 86cbb9ea2f26..8ec1a9a0bb9a 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -853,7 +853,9 @@ static int slip_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ - return sl->dev->base_addr; + + /* TTY layer expects 0 on success */ + return 0; err_free_bufs: sl_free_bufs(sl); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index a301479ecc60..c924ea2bce07 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -567,7 +567,7 @@ static const struct usb_device_id products [] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1004, 0x61aa, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, + .driver_info = (unsigned long)&wwan_info, }, /* diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 7d42f9a2c068..81126ff85e05 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -65,6 +65,7 @@ #define IPHETH_USBINTF_PROTO 1 #define IPHETH_BUF_SIZE 1516 +#define IPHETH_IP_ALIGN 2 /* padding at front of URB */ #define IPHETH_TX_TIMEOUT (5 * HZ) #define IPHETH_INTFNUM 2 @@ -202,18 +203,21 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - len = urb->actual_length; - buf = urb->transfer_buffer; + if (urb->actual_length <= IPHETH_IP_ALIGN) { + dev->net->stats.rx_length_errors++; + return; + } + len = urb->actual_length - IPHETH_IP_ALIGN; + buf = urb->transfer_buffer + IPHETH_IP_ALIGN; - skb = dev_alloc_skb(NET_IP_ALIGN + len); + skb = dev_alloc_skb(len); if (!skb) { err("%s: dev_alloc_skb: -ENOMEM", __func__); dev->net->stats.rx_dropped++; return; } - skb_reserve(skb, NET_IP_ALIGN); - memcpy(skb_put(skb, len), buf + NET_IP_ALIGN, len - NET_IP_ALIGN); + memcpy(skb_put(skb, len), buf, len); skb->dev = dev->net; skb->protocol = eth_type_trans(skb, dev->net); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 009bba3d753e..9ab439d144ed 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -645,6 +645,7 @@ int usbnet_stop (struct net_device *net) struct driver_info *info = dev->driver_info; int retval; + clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); netif_info(dev, ifdown, dev->net, @@ -1524,9 +1525,12 @@ int usbnet_resume (struct usb_interface *intf) smp_mb(); clear_bit(EVENT_DEV_ASLEEP, &dev->flags); spin_unlock_irq(&dev->txq.lock); - if (!(dev->txq.qlen >= TX_QLEN(dev))) - netif_start_queue(dev->net); - tasklet_schedule (&dev->bh); + + if (test_bit(EVENT_DEV_OPEN, &dev->flags)) { + if (!(dev->txq.qlen >= TX_QLEN(dev))) + netif_start_queue(dev->net); + tasklet_schedule (&dev->bh); + } } return 0; } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 0d47c3a05307..c16ed961153a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -178,6 +178,7 @@ static void vmxnet3_process_events(struct vmxnet3_adapter *adapter) { int i; + unsigned long flags; u32 events = le32_to_cpu(adapter->shared->ecr); if (!events) return; @@ -190,10 +191,10 @@ vmxnet3_process_events(struct vmxnet3_adapter *adapter) /* Check if there is an error on xmit/recv queues */ if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { - spin_lock(&adapter->cmd_lock); + spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_QUEUE_STATUS); - spin_unlock(&adapter->cmd_lock); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tqd_start[i].status.stopped) @@ -2733,13 +2734,14 @@ static void vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) { u32 cfg; + unsigned long flags; /* intr settings */ - spin_lock(&adapter->cmd_lock); + spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_CONF_INTR); cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); - spin_unlock(&adapter->cmd_lock); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); adapter->intr.type = cfg & 0x3; adapter->intr.mask_mode = (cfg >> 2) & 0x3; diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 5f2dd386152b..2c1abf63957f 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -585,8 +585,9 @@ static bool eeepc_wlan_rfkill_blocked(struct eeepc_laptop *eeepc) return true; } -static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc) +static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) { + struct pci_dev *port; struct pci_dev *dev; struct pci_bus *bus; bool blocked = eeepc_wlan_rfkill_blocked(eeepc); @@ -599,9 +600,16 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc) mutex_lock(&eeepc->hotplug_lock); if (eeepc->hotplug_slot) { - bus = pci_find_bus(0, 1); + port = acpi_get_pci_dev(handle); + if (!port) { + pr_warning("Unable to find port\n"); + goto out_unlock; + } + + bus = port->subordinate; + if (!bus) { - pr_warning("Unable to find PCI bus 1?\n"); + pr_warning("Unable to find PCI bus?\n"); goto out_unlock; } @@ -609,6 +617,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc) pr_err("Unable to read PCI config space?\n"); goto out_unlock; } + absent = (l == 0xffffffff); if (blocked != absent) { @@ -647,6 +656,17 @@ out_unlock: mutex_unlock(&eeepc->hotplug_lock); } +static void eeepc_rfkill_hotplug_update(struct eeepc_laptop *eeepc, char *node) +{ + acpi_status status = AE_OK; + acpi_handle handle; + + status = acpi_get_handle(NULL, node, &handle); + + if (ACPI_SUCCESS(status)) + eeepc_rfkill_hotplug(eeepc, handle); +} + static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) { struct eeepc_laptop *eeepc = data; @@ -654,7 +674,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) if (event != ACPI_NOTIFY_BUS_CHECK) return; - eeepc_rfkill_hotplug(eeepc); + eeepc_rfkill_hotplug(eeepc, handle); } static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc, @@ -672,6 +692,11 @@ static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc, eeepc); if (ACPI_FAILURE(status)) pr_warning("Failed to register notify on %s\n", node); + /* + * Refresh pci hotplug in case the rfkill state was + * changed during setup. + */ + eeepc_rfkill_hotplug(eeepc, handle); } else return -ENODEV; @@ -693,6 +718,12 @@ static void eeepc_unregister_rfkill_notifier(struct eeepc_laptop *eeepc, if (ACPI_FAILURE(status)) pr_err("Error removing rfkill notify handler %s\n", node); + /* + * Refresh pci hotplug in case the rfkill + * state was changed after + * eeepc_unregister_rfkill_notifier() + */ + eeepc_rfkill_hotplug(eeepc, handle); } } @@ -816,11 +847,7 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc) rfkill_destroy(eeepc->wlan_rfkill); eeepc->wlan_rfkill = NULL; } - /* - * Refresh pci hotplug in case the rfkill state was changed after - * eeepc_unregister_rfkill_notifier() - */ - eeepc_rfkill_hotplug(eeepc); + if (eeepc->hotplug_slot) pci_hp_deregister(eeepc->hotplug_slot); @@ -889,11 +916,6 @@ static int eeepc_rfkill_init(struct eeepc_laptop *eeepc) eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5"); eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6"); eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7"); - /* - * Refresh pci hotplug in case the rfkill state was changed during - * setup. - */ - eeepc_rfkill_hotplug(eeepc); exit: if (result && result != -ENODEV) @@ -928,8 +950,11 @@ static int eeepc_hotk_restore(struct device *device) struct eeepc_laptop *eeepc = dev_get_drvdata(device); /* Refresh both wlan rfkill state and pci hotplug */ - if (eeepc->wlan_rfkill) - eeepc_rfkill_hotplug(eeepc); + if (eeepc->wlan_rfkill) { + eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P5"); + eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P6"); + eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P7"); + } if (eeepc->bluetooth_rfkill) rfkill_set_sw_state(eeepc->bluetooth_rfkill, diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 8f709aec4da0..6fe8cd6e23b5 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -934,6 +934,14 @@ static ssize_t sony_nc_sysfs_store(struct device *dev, /* * Backlight device */ +struct sony_backlight_props { + struct backlight_device *dev; + int handle; + u8 offset; + u8 maxlvl; +}; +struct sony_backlight_props sony_bl_props; + static int sony_backlight_update_status(struct backlight_device *bd) { return acpi_callsetfunc(sony_nc_acpi_handle, "SBRT", @@ -954,21 +962,26 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd) { int result; int *handle = (int *)bl_get_data(bd); + struct sony_backlight_props *sdev = + (struct sony_backlight_props *)bl_get_data(bd); - sony_call_snc_handle(*handle, 0x0200, &result); + sony_call_snc_handle(sdev->handle, 0x0200, &result); - return result & 0xff; + return (result & 0xff) - sdev->offset; } static int sony_nc_update_status_ng(struct backlight_device *bd) { int value, result; int *handle = (int *)bl_get_data(bd); + struct sony_backlight_props *sdev = + (struct sony_backlight_props *)bl_get_data(bd); - value = bd->props.brightness; - sony_call_snc_handle(*handle, 0x0100 | (value << 16), &result); + value = bd->props.brightness + sdev->offset; + if (sony_call_snc_handle(sdev->handle, 0x0100 | (value << 16), &result)) + return -EIO; - return sony_nc_get_brightness_ng(bd); + return value; } static const struct backlight_ops sony_backlight_ops = { @@ -981,8 +994,6 @@ static const struct backlight_ops sony_backlight_ng_ops = { .update_status = sony_nc_update_status_ng, .get_brightness = sony_nc_get_brightness_ng, }; -static int backlight_ng_handle; -static struct backlight_device *sony_backlight_device; /* * New SNC-only Vaios event mapping to driver known keys @@ -1549,6 +1560,75 @@ static void sony_nc_kbd_backlight_resume(void) &ignore); } +static void sony_nc_backlight_ng_read_limits(int handle, + struct sony_backlight_props *props) +{ + int offset; + acpi_status status; + u8 brlvl, i; + u8 min = 0xff, max = 0x00; + struct acpi_object_list params; + union acpi_object in_obj; + union acpi_object *lvl_enum; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + props->handle = handle; + props->offset = 0; + props->maxlvl = 0xff; + + offset = sony_find_snc_handle(handle); + if (offset < 0) + return; + + /* try to read the boundaries from ACPI tables, if we fail the above + * defaults should be reasonable + */ + params.count = 1; + params.pointer = &in_obj; + in_obj.type = ACPI_TYPE_INTEGER; + in_obj.integer.value = offset; + status = acpi_evaluate_object(sony_nc_acpi_handle, "SN06", ¶ms, + &buffer); + if (ACPI_FAILURE(status)) + return; + + lvl_enum = (union acpi_object *) buffer.pointer; + if (!lvl_enum) { + pr_err("No SN06 return object."); + return; + } + if (lvl_enum->type != ACPI_TYPE_BUFFER) { + pr_err("Invalid SN06 return object 0x%.2x\n", + lvl_enum->type); + goto out_invalid; + } + + /* the buffer lists brightness levels available, brightness levels are + * from 0 to 8 in the array, other values are used by ALS control. + */ + for (i = 0; i < 9 && i < lvl_enum->buffer.length; i++) { + + brlvl = *(lvl_enum->buffer.pointer + i); + dprintk("Brightness level: %d\n", brlvl); + + if (!brlvl) + break; + + if (brlvl > max) + max = brlvl; + if (brlvl < min) + min = brlvl; + } + props->offset = min; + props->maxlvl = max; + dprintk("Brightness levels: min=%d max=%d\n", props->offset, + props->maxlvl); + +out_invalid: + kfree(buffer.pointer); + return; +} + static void sony_nc_backlight_setup(void) { acpi_handle unused; @@ -1557,14 +1637,14 @@ static void sony_nc_backlight_setup(void) struct backlight_properties props; if (sony_find_snc_handle(0x12f) != -1) { - backlight_ng_handle = 0x12f; ops = &sony_backlight_ng_ops; - max_brightness = 0xff; + sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; } else if (sony_find_snc_handle(0x137) != -1) { - backlight_ng_handle = 0x137; ops = &sony_backlight_ng_ops; - max_brightness = 0xff; + sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; } else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT", &unused))) { @@ -1577,22 +1657,22 @@ static void sony_nc_backlight_setup(void) memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = max_brightness; - sony_backlight_device = backlight_device_register("sony", NULL, - &backlight_ng_handle, - ops, &props); + sony_bl_props.dev = backlight_device_register("sony", NULL, + &sony_bl_props, + ops, &props); - if (IS_ERR(sony_backlight_device)) { - pr_warning(DRV_PFX "unable to register backlight device\n"); - sony_backlight_device = NULL; + if (IS_ERR(sony_bl_props.dev)) { + pr_warn(DRV_PFX "unable to register backlight device\n"); + sony_bl_props.dev = NULL; } else - sony_backlight_device->props.brightness = - ops->get_brightness(sony_backlight_device); + sony_bl_props.dev->props.brightness = + ops->get_brightness(sony_bl_props.dev); } static void sony_nc_backlight_cleanup(void) { - if (sony_backlight_device) - backlight_device_unregister(sony_backlight_device); + if (sony_bl_props.dev) + backlight_device_unregister(sony_bl_props.dev); } static int sony_nc_add(struct acpi_device *device) @@ -2590,7 +2670,7 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd, mutex_lock(&spic_dev.lock); switch (cmd) { case SONYPI_IOCGBRT: - if (sony_backlight_device == NULL) { + if (sony_bl_props.dev == NULL) { ret = -EIO; break; } @@ -2603,7 +2683,7 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd, ret = -EFAULT; break; case SONYPI_IOCSBRT: - if (sony_backlight_device == NULL) { + if (sony_bl_props.dev == NULL) { ret = -EIO; break; } @@ -2617,8 +2697,8 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd, break; } /* sync the backlight device status */ - sony_backlight_device->props.brightness = - sony_backlight_get_brightness(sony_backlight_device); + sony_bl_props.dev->props.brightness = + sony_backlight_get_brightness(sony_bl_props.dev); break; case SONYPI_IOCGBAT1CAP: if (ec_read16(SONYPI_BAT1_FULL, &val16)) { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index efb3b6b9bcdb..562fcf0dd2b5 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -128,7 +128,8 @@ enum { }; /* ACPI HIDs */ -#define TPACPI_ACPI_HKEY_HID "IBM0068" +#define TPACPI_ACPI_IBM_HKEY_HID "IBM0068" +#define TPACPI_ACPI_LENOVO_HKEY_HID "LEN0068" #define TPACPI_ACPI_EC_HID "PNP0C09" /* Input IDs */ @@ -3879,7 +3880,8 @@ errexit: } static const struct acpi_device_id ibm_htk_device_ids[] = { - {TPACPI_ACPI_HKEY_HID, 0}, + {TPACPI_ACPI_IBM_HKEY_HID, 0}, + {TPACPI_ACPI_LENOVO_HKEY_HID, 0}, {"", 0}, }; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9901b8f8443..0bac91e72370 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -400,10 +400,15 @@ static inline int scsi_host_is_busy(struct Scsi_Host *shost) static void scsi_run_queue(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost = sdev->host; + struct Scsi_Host *shost; LIST_HEAD(starved_list); unsigned long flags; + /* if the device is dead, sdev will be NULL, so no queue to run */ + if (!sdev) + return; + + shost = sdev->host; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c index eeb7dd43f9a8..830822f86e41 100644 --- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c +++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c @@ -2288,7 +2288,3 @@ err_dev: free_netdev(dev); return NULL; } - -EXPORT_SYMBOL(init_ft1000_card); -EXPORT_SYMBOL(stop_ft1000_card); -EXPORT_SYMBOL(flarion_ft1000_cnt); diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c index 935608e72007..bdfb1aec58df 100644 --- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c +++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c @@ -214,6 +214,3 @@ void ft1000CleanupProc(struct net_device *dev) remove_proc_entry(FT1000_PROC, init_net.proc_net); unregister_netdevice_notifier(&ft1000_netdev_notifier); } - -EXPORT_SYMBOL(ft1000InitProc); -EXPORT_SYMBOL(ft1000CleanupProc); diff --git a/drivers/staging/gma500/Kconfig b/drivers/staging/gma500/Kconfig index 5501eb9b3355..ce8bedaeaac2 100644 --- a/drivers/staging/gma500/Kconfig +++ b/drivers/staging/gma500/Kconfig @@ -1,6 +1,6 @@ config DRM_PSB tristate "Intel GMA500 KMS Framebuffer" - depends on DRM && PCI + depends on DRM && PCI && X86 select FB_CFB_COPYAREA select FB_CFB_FILLRECT select FB_CFB_IMAGEBLIT diff --git a/drivers/staging/intel_sst/intelmid_v1_control.c b/drivers/staging/intel_sst/intelmid_v1_control.c index 9cc15c1c18d4..1ea814218059 100644 --- a/drivers/staging/intel_sst/intelmid_v1_control.c +++ b/drivers/staging/intel_sst/intelmid_v1_control.c @@ -28,6 +28,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/pci.h> +#include <linux/delay.h> #include <linux/file.h> #include <asm/mrst.h> #include <sound/pcm.h> diff --git a/drivers/staging/intel_sst/intelmid_v2_control.c b/drivers/staging/intel_sst/intelmid_v2_control.c index 26d815a67eb8..3c6b3abff3c3 100644 --- a/drivers/staging/intel_sst/intelmid_v2_control.c +++ b/drivers/staging/intel_sst/intelmid_v2_control.c @@ -29,6 +29,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/pci.h> +#include <linux/delay.h> #include <linux/file.h> #include "intel_sst.h" #include "intelmid_snd_control.h" diff --git a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c index b5d21f6497f9..22c04eabed41 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c +++ b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c @@ -12,6 +12,7 @@ */ #include <linux/cs5535.h> #include <linux/gpio.h> +#include <linux/delay.h> #include <asm/olpc.h> #include "olpc_dcon.h" diff --git a/drivers/staging/rts_pstor/debug.h b/drivers/staging/rts_pstor/debug.h index e1408b0e7ae4..ab305be96fb5 100644 --- a/drivers/staging/rts_pstor/debug.h +++ b/drivers/staging/rts_pstor/debug.h @@ -28,7 +28,7 @@ #define RTSX_STOR "rts_pstor: " -#if CONFIG_RTS_PSTOR_DEBUG +#ifdef CONFIG_RTS_PSTOR_DEBUG #define RTSX_DEBUGP(x...) printk(KERN_DEBUG RTSX_STOR x) #define RTSX_DEBUGPN(x...) printk(KERN_DEBUG x) #define RTSX_DEBUGPX(x...) printk(x) diff --git a/drivers/staging/rts_pstor/ms.c b/drivers/staging/rts_pstor/ms.c index 810e170894f5..d89795c6a3ac 100644 --- a/drivers/staging/rts_pstor/ms.c +++ b/drivers/staging/rts_pstor/ms.c @@ -23,6 +23,7 @@ #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> +#include <linux/vmalloc.h> #include "rtsx.h" #include "rtsx_transport.h" diff --git a/drivers/staging/rts_pstor/rtsx_chip.c b/drivers/staging/rts_pstor/rtsx_chip.c index d2f1c715a684..4e60780ea804 100644 --- a/drivers/staging/rts_pstor/rtsx_chip.c +++ b/drivers/staging/rts_pstor/rtsx_chip.c @@ -24,6 +24,7 @@ #include <linux/kthread.h> #include <linux/sched.h> #include <linux/workqueue.h> +#include <linux/vmalloc.h> #include "rtsx.h" #include "rtsx_transport.h" @@ -1311,11 +1312,11 @@ void rtsx_polling_func(struct rtsx_chip *chip) #ifdef SUPPORT_OCP if (CHECK_LUN_MODE(chip, SD_MS_2LUN)) { - #if CONFIG_RTS_PSTOR_DEBUG +#ifdef CONFIG_RTS_PSTOR_DEBUG if (chip->ocp_stat & (SD_OC_NOW | SD_OC_EVER | MS_OC_NOW | MS_OC_EVER)) { RTSX_DEBUGP("Over current, OCPSTAT is 0x%x\n", chip->ocp_stat); } - #endif +#endif if (chip->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) { if (chip->card_exist & SD_CARD) { diff --git a/drivers/staging/rts_pstor/rtsx_scsi.c b/drivers/staging/rts_pstor/rtsx_scsi.c index 20c2464a20f9..7de1fae443fc 100644 --- a/drivers/staging/rts_pstor/rtsx_scsi.c +++ b/drivers/staging/rts_pstor/rtsx_scsi.c @@ -23,6 +23,7 @@ #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> +#include <linux/vmalloc.h> #include "rtsx.h" #include "rtsx_transport.h" diff --git a/drivers/staging/rts_pstor/sd.c b/drivers/staging/rts_pstor/sd.c index 8d066bd428c4..b1277a6c7a8b 100644 --- a/drivers/staging/rts_pstor/sd.c +++ b/drivers/staging/rts_pstor/sd.c @@ -909,7 +909,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir) RTSX_WRITE_REG(chip, SD_VPCLK0_CTL, PHASE_NOT_RESET, PHASE_NOT_RESET); RTSX_WRITE_REG(chip, CLK_CTL, CHANGE_CLK, 0); } else { -#if CONFIG_RTS_PSTOR_DEBUG +#ifdef CONFIG_RTS_PSTOR_DEBUG rtsx_read_register(chip, SD_VP_CTL, &val); RTSX_DEBUGP("SD_VP_CTL: 0x%x\n", val); rtsx_read_register(chip, SD_DCMPS_CTL, &val); @@ -958,7 +958,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir) return STATUS_SUCCESS; Fail: -#if CONFIG_RTS_PSTOR_DEBUG +#ifdef CONFIG_RTS_PSTOR_DEBUG rtsx_read_register(chip, SD_VP_CTL, &val); RTSX_DEBUGP("SD_VP_CTL: 0x%x\n", val); rtsx_read_register(chip, SD_DCMPS_CTL, &val); diff --git a/drivers/staging/rts_pstor/trace.h b/drivers/staging/rts_pstor/trace.h index 2c668bae6ff4..bc83b49a4eb4 100644 --- a/drivers/staging/rts_pstor/trace.h +++ b/drivers/staging/rts_pstor/trace.h @@ -82,7 +82,7 @@ do { \ #define TRACE_GOTO(chip, label) goto label #endif -#if CONFIG_RTS_PSTOR_DEBUG +#ifdef CONFIG_RTS_PSTOR_DEBUG static inline void rtsx_dump(u8 *buf, int buf_len) { int i; diff --git a/drivers/staging/rts_pstor/xd.c b/drivers/staging/rts_pstor/xd.c index 7bcd468b8f2c..9f3add1e8f59 100644 --- a/drivers/staging/rts_pstor/xd.c +++ b/drivers/staging/rts_pstor/xd.c @@ -23,6 +23,7 @@ #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> +#include <linux/vmalloc.h> #include "rtsx.h" #include "rtsx_transport.h" diff --git a/drivers/staging/solo6x10/Kconfig b/drivers/staging/solo6x10/Kconfig index 2cf77c940860..03dcac4ea4d0 100644 --- a/drivers/staging/solo6x10/Kconfig +++ b/drivers/staging/solo6x10/Kconfig @@ -2,6 +2,7 @@ config SOLO6X10 tristate "Softlogic 6x10 MPEG codec cards" depends on PCI && VIDEO_DEV && SND && I2C select VIDEOBUF_DMA_SG + select SND_PCM ---help--- This driver supports the Softlogic based MPEG-4 and h.264 codec codec cards. diff --git a/drivers/staging/usbip/vhci_hcd.c b/drivers/staging/usbip/vhci_hcd.c index 0f02a4b12ae4..4f4f13321f40 100644 --- a/drivers/staging/usbip/vhci_hcd.c +++ b/drivers/staging/usbip/vhci_hcd.c @@ -876,8 +876,10 @@ static void vhci_shutdown_connection(struct usbip_device *ud) } /* kill threads related to this sdev, if v.c. exists */ - kthread_stop(vdev->ud.tcp_rx); - kthread_stop(vdev->ud.tcp_tx); + if (vdev->ud.tcp_rx) + kthread_stop(vdev->ud.tcp_rx); + if (vdev->ud.tcp_tx) + kthread_stop(vdev->ud.tcp_tx); usbip_uinfo("stop threads\n"); @@ -949,9 +951,6 @@ static void vhci_device_init(struct vhci_device *vdev) { memset(vdev, 0, sizeof(*vdev)); - vdev->ud.tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx"); - vdev->ud.tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx"); - vdev->ud.side = USBIP_VHCI; vdev->ud.status = VDEV_ST_NULL; /* vdev->ud.lock = SPIN_LOCK_UNLOCKED; */ @@ -1139,7 +1138,7 @@ static int vhci_hcd_probe(struct platform_device *pdev) usbip_uerr("create hcd failed\n"); return -ENOMEM; } - + hcd->has_tt = 1; /* this is private data for vhci_hcd */ the_controller = hcd_to_vhci(hcd); diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c index 3f2459f30415..e2dadbd5ef1e 100644 --- a/drivers/staging/usbip/vhci_sysfs.c +++ b/drivers/staging/usbip/vhci_sysfs.c @@ -21,6 +21,7 @@ #include "vhci.h" #include <linux/in.h> +#include <linux/kthread.h> /* TODO: refine locking ?*/ @@ -220,13 +221,13 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr, vdev->ud.tcp_socket = socket; vdev->ud.status = VDEV_ST_NOTASSIGNED; - wake_up_process(vdev->ud.tcp_rx); - wake_up_process(vdev->ud.tcp_tx); - spin_unlock(&vdev->ud.lock); spin_unlock(&the_controller->lock); /* end the lock */ + vdev->ud.tcp_rx = kthread_run(vhci_rx_loop, &vdev->ud, "vhci_rx"); + vdev->ud.tcp_tx = kthread_run(vhci_tx_loop, &vdev->ud, "vhci_tx"); + rh_port_connect(rhport, speed); return count; diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index 6a71f52c59b1..76378397b763 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -273,7 +273,7 @@ exit: } int prism2_set_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_index) + u8 key_index, bool unicast, bool multicast) { wlandevice_t *wlandev = dev->ml_priv; diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index 7e41a95c5ceb..627f3a678759 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -40,6 +40,7 @@ #include <linux/slab.h> #include <linux/usb/ulpi.h> #include <plat/usb.h> +#include <linux/regulator/consumer.h> /* EHCI Register Set */ #define EHCI_INSNREG04 (0xA0) @@ -118,6 +119,8 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) struct ehci_hcd *omap_ehci; int ret = -ENODEV; int irq; + int i; + char supply[7]; if (usb_disabled()) return -ENODEV; @@ -158,6 +161,23 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) hcd->rsrc_len = resource_size(res); hcd->regs = regs; + /* get ehci regulator and enable */ + for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) { + if (pdata->port_mode[i] != OMAP_EHCI_PORT_MODE_PHY) { + pdata->regulator[i] = NULL; + continue; + } + snprintf(supply, sizeof(supply), "hsusb%d", i); + pdata->regulator[i] = regulator_get(dev, supply); + if (IS_ERR(pdata->regulator[i])) { + pdata->regulator[i] = NULL; + dev_dbg(dev, + "failed to get ehci port%d regulator\n", i); + } else { + regulator_enable(pdata->regulator[i]); + } + } + ret = omap_usbhs_enable(dev); if (ret) { dev_err(dev, "failed to start usbhs with err %d\n", ret); diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c index 795345ad45e6..7b2e69aa2e98 100644 --- a/drivers/usb/host/isp1760-hcd.c +++ b/drivers/usb/host/isp1760-hcd.c @@ -1633,6 +1633,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) ints[i].qh = NULL; ints[i].qtd = NULL; + urb->status = status; isp1760_urb_done(hcd, urb); if (qtd) pe(hcd, qh, qtd); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index a78f2ebd11b7..73f75d26436c 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -777,7 +777,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd) if (t1 != t2) xhci_writel(xhci, t2, port_array[port_index]); - if (DEV_HIGHSPEED(t1)) { + if (hcd->speed != HCD_USB3) { /* enable remote wake up for USB 2.0 */ u32 __iomem *addr; u32 tmp; @@ -866,6 +866,21 @@ int xhci_bus_resume(struct usb_hcd *hcd) temp |= PORT_LINK_STROBE | XDEV_U0; xhci_writel(xhci, temp, port_array[port_index]); } + /* wait for the port to enter U0 and report port link + * state change. + */ + spin_unlock_irqrestore(&xhci->lock, flags); + msleep(20); + spin_lock_irqsave(&xhci->lock, flags); + + /* Clear PLC */ + temp = xhci_readl(xhci, port_array[port_index]); + if (temp & PORT_PLC) { + temp = xhci_port_state_to_neutral(temp); + temp |= PORT_PLC; + xhci_writel(xhci, temp, port_array[port_index]); + } + slot_id = xhci_find_slot_id_by_port(hcd, xhci, port_index + 1); if (slot_id) @@ -873,7 +888,7 @@ int xhci_bus_resume(struct usb_hcd *hcd) } else xhci_writel(xhci, temp, port_array[port_index]); - if (DEV_HIGHSPEED(temp)) { + if (hcd->speed != HCD_USB3) { /* disable remote wake up for USB 2.0 */ u32 __iomem *addr; u32 tmp; diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6dfbf9ffd7a6..f47c20197c61 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1887,11 +1887,9 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver, otg_set_vbus(musb->xceiv, 1); hcd->self.uses_pio_for_control = 1; - - if (musb->xceiv->last_event == USB_EVENT_NONE) - pm_runtime_put(musb->controller); - } + if (musb->xceiv->last_event == USB_EVENT_NONE) + pm_runtime_put(musb->controller); return 0; diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 57a27fa954b4..e9e60b6e0583 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -270,7 +270,7 @@ static int musb_otg_notifications(struct notifier_block *nb, DBG(4, "VBUS Disconnect\n"); #ifdef CONFIG_USB_GADGET_MUSB_HDRC - if (is_otg_enabled(musb)) + if (is_otg_enabled(musb) || is_peripheral_enabled(musb)) if (musb->gadget_driver) #endif { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e159c529fd2b..38b8ab554924 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -775,6 +775,13 @@ get_more_pages: ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, true, 1, 0); + + if (!req) { + rc = -ENOMEM; + unlock_page(page); + break; + } + max_pages = req->r_num_pages; alloc_page_vec(fsc, req); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5323c330bbf3..9fa08662a88d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1331,10 +1331,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) } /* - * Mark caps dirty. If inode is newly dirty, add to the global dirty - * list. + * Mark caps dirty. If inode is newly dirty, return the dirty flags. + * Caller is then responsible for calling __mark_inode_dirty with the + * returned flags value. */ -void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) +int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { struct ceph_mds_client *mdsc = ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; @@ -1357,7 +1358,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) list_add(&ci->i_dirty_item, &mdsc->cap_dirty); spin_unlock(&mdsc->cap_dirty_lock); if (ci->i_flushing_caps == 0) { - igrab(inode); + ihold(inode); dirty |= I_DIRTY_SYNC; } } @@ -1365,9 +1366,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && (mask & CEPH_CAP_FILE_BUFFER)) dirty |= I_DIRTY_DATASYNC; - if (dirty) - __mark_inode_dirty(inode, dirty); __cap_delay_requeue(mdsc, ci); + return dirty; } /* @@ -1991,7 +1991,7 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) ci->i_wr_ref++; if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wrbuffer_ref == 0) - igrab(&ci->vfs_inode); + ihold(&ci->vfs_inode); ci->i_wrbuffer_ref++; dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n", &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 159b512d5a27..203252d88d9f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -734,9 +734,12 @@ retry_snap: } } if (ret >= 0) { + int dirty; spin_lock(&inode->i_lock); - __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); spin_unlock(&inode->i_lock); + if (dirty) + __mark_inode_dirty(inode, dirty); } out: diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index b54c97da1c43..03d6dafda61f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1567,6 +1567,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) int release = 0, dirtied = 0; int mask = 0; int err = 0; + int inode_dirty_flags = 0; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1725,13 +1726,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) dout("setattr %p ATTR_FILE ... hrm!\n", inode); if (dirtied) { - __ceph_mark_dirty_caps(ci, dirtied); + inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); inode->i_ctime = CURRENT_TIME; } release &= issued; spin_unlock(&inode->i_lock); + if (inode_dirty_flags) + __mark_inode_dirty(inode, inode_dirty_flags); + if (mask) { req->r_inode = igrab(inode); req->r_inode_drop = release; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 619fe719968f..b1f1b8bb1271 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) { return ci->i_dirty_caps | ci->i_flushing_caps; } -extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); +extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); extern int __ceph_caps_used(struct ceph_inode_info *ci); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 8c9eba6ef9df..f2b628696180 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name, struct ceph_inode_xattr *xattr = NULL; int issued; int required_blob_size; + int dirty; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -763,11 +764,12 @@ retry: dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); err = __set_xattr(ci, newname, name_len, newval, val_len, 1, 1, 1, &xattr); - __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&inode->i_lock); - + if (dirty) + __mark_inode_dirty(inode, dirty); return err; do_sync: @@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); int issued; int err; + int dirty; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name) goto do_sync; err = __remove_xattr_by_name(ceph_inode(inode), name); - __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&inode->i_lock); - + if (dirty) + __mark_inode_dirty(inode, dirty); return err; do_sync: spin_unlock(&inode->i_lock); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4bc862a80efa..05f1dcf7d79a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -274,7 +274,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) char *data_area_of_target; char *data_area_of_buf2; int remaining; - __u16 byte_count, total_data_size, total_in_buf, total_in_buf2; + unsigned int byte_count, total_in_buf; + __u16 total_data_size, total_in_buf2; total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); @@ -287,7 +288,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) remaining = total_data_size - total_in_buf; if (remaining < 0) - return -EINVAL; + return -EPROTO; if (remaining == 0) /* nothing to do, ignore */ return 0; @@ -308,20 +309,29 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) data_area_of_target += total_in_buf; /* copy second buffer into end of first buffer */ - memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); total_in_buf += total_in_buf2; + /* is the result too big for the field? */ + if (total_in_buf > USHRT_MAX) + return -EPROTO; put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); + + /* fix up the BCC */ byte_count = get_bcc_le(pTargetSMB); byte_count += total_in_buf2; + /* is the result too big for the field? */ + if (byte_count > USHRT_MAX) + return -EPROTO; put_bcc_le(byte_count, pTargetSMB); byte_count = pTargetSMB->smb_buf_length; byte_count += total_in_buf2; - - /* BB also add check that we are not beyond maximum buffer size */ - + /* don't allow buffer to overflow */ + if (byte_count > CIFSMaxBufSize) + return -ENOBUFS; pTargetSMB->smb_buf_length = byte_count; + memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); + if (remaining == total_in_buf2) { cFYI(1, "found the last secondary response"); return 0; /* we are done */ @@ -607,59 +617,63 @@ incomplete_rcv: list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if ((mid_entry->mid == smb_buffer->Mid) && - (mid_entry->midState == MID_REQUEST_SUBMITTED) && - (mid_entry->command == smb_buffer->Command)) { - if (length == 0 && - check2ndT2(smb_buffer, server->maxBuf) > 0) { - /* We have a multipart transact2 resp */ - isMultiRsp = true; - if (mid_entry->resp_buf) { - /* merge response - fix up 1st*/ - if (coalesce_t2(smb_buffer, - mid_entry->resp_buf)) { - mid_entry->multiRsp = - true; - break; - } else { - /* all parts received */ - mid_entry->multiEnd = - true; - goto multi_t2_fnd; - } + if (mid_entry->mid != smb_buffer->Mid || + mid_entry->midState != MID_REQUEST_SUBMITTED || + mid_entry->command != smb_buffer->Command) { + mid_entry = NULL; + continue; + } + + if (length == 0 && + check2ndT2(smb_buffer, server->maxBuf) > 0) { + /* We have a multipart transact2 resp */ + isMultiRsp = true; + if (mid_entry->resp_buf) { + /* merge response - fix up 1st*/ + length = coalesce_t2(smb_buffer, + mid_entry->resp_buf); + if (length > 0) { + length = 0; + mid_entry->multiRsp = true; + break; } else { - if (!isLargeBuf) { - cERROR(1, "1st trans2 resp needs bigbuf"); - /* BB maybe we can fix this up, switch - to already allocated large buffer? */ - } else { - /* Have first buffer */ - mid_entry->resp_buf = - smb_buffer; - mid_entry->largeBuf = - true; - bigbuf = NULL; - } + /* all parts received or + * packet is malformed + */ + mid_entry->multiEnd = true; + goto multi_t2_fnd; + } + } else { + if (!isLargeBuf) { + /* + * FIXME: switch to already + * allocated largebuf? + */ + cERROR(1, "1st trans2 resp " + "needs bigbuf"); + } else { + /* Have first buffer */ + mid_entry->resp_buf = + smb_buffer; + mid_entry->largeBuf = true; + bigbuf = NULL; } - break; } - mid_entry->resp_buf = smb_buffer; - mid_entry->largeBuf = isLargeBuf; + break; + } + mid_entry->resp_buf = smb_buffer; + mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: - if (length == 0) - mid_entry->midState = - MID_RESPONSE_RECEIVED; - else - mid_entry->midState = - MID_RESPONSE_MALFORMED; + if (length == 0) + mid_entry->midState = MID_RESPONSE_RECEIVED; + else + mid_entry->midState = MID_RESPONSE_MALFORMED; #ifdef CONFIG_CIFS_STATS2 - mid_entry->when_received = jiffies; + mid_entry->when_received = jiffies; #endif - list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); - break; - } - mid_entry = NULL; + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); + break; } spin_unlock(&GlobalMid_Lock); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f6728eb6f4b9..645114ad0a10 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, } static void -decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, +decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, const struct nls_table *nls_cp) { int len; @@ -284,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, cFYI(1, "bleft %d", bleft); - /* - * Windows servers do not always double null terminate their final - * Unicode string. Check to see if there are an uneven number of bytes - * left. If so, then add an extra NULL pad byte to the end of the - * response. - * - * See section 2.7.2 in "Implementing CIFS" for details - */ - if (bleft % 2) { - data[bleft] = 0; - ++bleft; - } - kfree(ses->serverOS); ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); cFYI(1, "serverOS=%s", ses->serverOS); @@ -929,7 +916,9 @@ ssetup_ntlmssp_authenticate: } /* BB check if Unicode and decode strings */ - if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { /* unicode string area must be word-aligned */ if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { ++bcc_ptr; diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig index 0c39dc3ef7d7..56bd15c5bf6c 100644 --- a/fs/hpfs/Kconfig +++ b/fs/hpfs/Kconfig @@ -1,7 +1,6 @@ config HPFS_FS tristate "OS/2 HPFS file system support" depends on BLOCK - depends on BROKEN || !PREEMPT help OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS is the file system used for organizing files on OS/2 hard disk diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 5503e2c28910..7a5eb2c718c8 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -8,8 +8,6 @@ #include "hpfs_fn.h" -static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec); - /* * Check if a sector is allocated in bitmap * This is really slow. Turned on only if chk==2 @@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec); static int chk_if_allocated(struct super_block *s, secno sec, char *msg) { struct quad_buffer_head qbh; - unsigned *bmp; + u32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; - if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) { + if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); goto fail1; } @@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg) if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) { unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4; if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail; - if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) { + if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) { hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec); goto fail1; } @@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne hpfs_error(s, "Bad allocation size: %d", n); return 0; } - lock_super(s); if (bs != ~0x3fff) { if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls; } else { @@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne ret = bs + nr; goto rt; } - /*if (!tstbits(bmp, nr + n, n + forward)) { - ret = bs + nr + n; - goto rt; - }*/ q = nr + n; b = 0; while ((a = tstbits(bmp, q, n + forward)) != 0) { q += a; @@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne goto rt; } nr >>= 5; - /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/ + /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */ i = nr; do { - if (!bmp[i]) goto cont; - if (n + forward >= 0x3f && bmp[i] != -1) goto cont; + if (!le32_to_cpu(bmp[i])) goto cont; + if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont; q = i<<5; if (i > 0) { - unsigned k = bmp[i-1]; + unsigned k = le32_to_cpu(bmp[i-1]); while (k & 0x80000000) { q--; k <<= 1; } @@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne } while (i != nr); rt: if (ret) { - if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) { + if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) { hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret); ret = 0; goto b; } - bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f)); + bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f))); hpfs_mark_4buffers_dirty(&qbh); } b: hpfs_brelse4(&qbh); uls: - unlock_super(s); return ret; } @@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne * sectors */ -secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock) +secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward) { secno sec; int i; @@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa forward = -forward; f_p = 1; } - if (lock) hpfs_lock_creation(s); n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14; if (near && near < sbi->sb_fs_size) { if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret; @@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa ret: if (sec && f_p) { for (i = 0; i < forward; i++) { - if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) { + if (!hpfs_alloc_if_possible(s, sec + i + 1)) { hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i); sec = 0; break; } } } - if (lock) hpfs_unlock_creation(s); return sec; } -static secno alloc_in_dirband(struct super_block *s, secno near, int lock) +static secno alloc_in_dirband(struct super_block *s, secno near) { unsigned nr = near; secno sec; @@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock) nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4; nr -= sbi->sb_dirband_start; nr >>= 2; - if (lock) hpfs_lock_creation(s); sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0); - if (lock) hpfs_unlock_creation(s); if (!sec) return 0; return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start; } /* Alloc sector if it's free */ -static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec) +int hpfs_alloc_if_possible(struct super_block *s, secno sec) { struct quad_buffer_head qbh; - unsigned *bmp; - lock_super(s); + u32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; - if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) { - bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f)); + if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { + bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); - unlock_super(s); return 1; } hpfs_brelse4(&qbh); end: - unlock_super(s); return 0; } -int hpfs_alloc_if_possible(struct super_block *s, secno sec) -{ - int r; - hpfs_lock_creation(s); - r = hpfs_alloc_if_possible_nolock(s, sec); - hpfs_unlock_creation(s); - return r; -} - /* Free sectors in bitmaps */ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) { struct quad_buffer_head qbh; - unsigned *bmp; + u32 *bmp; struct hpfs_sb_info *sbi = hpfs_sb(s); /*printk("2 - ");*/ if (!n) return; @@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) hpfs_error(s, "Trying to free reserved sector %08x", sec); return; } - lock_super(s); sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n; if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff; new_map: if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) { - unlock_super(s); return; } new_tst: - if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) { + if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) { hpfs_error(s, "sector %08x not allocated", sec); hpfs_brelse4(&qbh); - unlock_super(s); return; } - bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f); + bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f)); if (!--n) { hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); - unlock_super(s); return; } if (!(++sec & 0x3fff)) { @@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; int i, j; - unsigned *bmp; + u32 *bmp; struct quad_buffer_head qbh; if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { for (j = 0; j < 512; j++) { unsigned k; - if (!bmp[j]) continue; - for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) { + if (!le32_to_cpu(bmp[j])) continue; + for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) { hpfs_brelse4(&qbh); return 0; } @@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) chk_bmp: if (bmp) { for (j = 0; j < 512; j++) { - unsigned k; - if (!bmp[j]) continue; + u32 k; + if (!le32_to_cpu(bmp[j])) continue; for (k = 0xf; k; k <<= 4) - if ((bmp[j] & k) == k) { + if ((le32_to_cpu(bmp[j]) & k) == k) { if (!--n) { hpfs_brelse4(&qbh); return 0; @@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) hpfs_free_sectors(s, dno, 4); } else { struct quad_buffer_head qbh; - unsigned *bmp; + u32 *bmp; unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; - lock_super(s); if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { - unlock_super(s); return; } - bmp[ssec >> 5] |= 1 << (ssec & 0x1f); + bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f)); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); - unlock_super(s); } } struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near, - dnode_secno *dno, struct quad_buffer_head *qbh, - int lock) + dnode_secno *dno, struct quad_buffer_head *qbh) { struct dnode *d; if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) { - if (!(*dno = alloc_in_dirband(s, near, lock))) - if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL; + if (!(*dno = alloc_in_dirband(s, near))) + if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL; } else { - if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) - if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL; + if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) + if (!(*dno = alloc_in_dirband(s, near))) return NULL; } if (!(d = hpfs_get_4sectors(s, *dno, qbh))) { hpfs_free_dnode(s, *dno); return NULL; } memset(d, 0, 2048); - d->magic = DNODE_MAGIC; - d->first_free = 52; + d->magic = cpu_to_le32(DNODE_MAGIC); + d->first_free = cpu_to_le32(52); d->dirent[0] = 32; d->dirent[2] = 8; d->dirent[30] = 1; d->dirent[31] = 255; - d->self = *dno; + d->self = cpu_to_le32(*dno); return d; } @@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f struct buffer_head **bh) { struct fnode *f; - if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL; + if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL; if (!(f = hpfs_get_sector(s, *fno, bh))) { hpfs_free_sectors(s, *fno, 1); return NULL; } memset(f, 0, 512); - f->magic = FNODE_MAGIC; - f->ea_offs = 0xc4; + f->magic = cpu_to_le32(FNODE_MAGIC); + f->ea_offs = cpu_to_le16(0xc4); f->btree.n_free_nodes = 8; - f->btree.first_free = 8; + f->btree.first_free = cpu_to_le16(8); return f; } @@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a struct buffer_head **bh) { struct anode *a; - if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL; + if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL; if (!(a = hpfs_get_sector(s, *ano, bh))) { hpfs_free_sectors(s, *ano, 1); return NULL; } memset(a, 0, 512); - a->magic = ANODE_MAGIC; - a->self = *ano; + a->magic = cpu_to_le32(ANODE_MAGIC); + a->self = cpu_to_le32(*ano); a->btree.n_free_nodes = 40; a->btree.n_used_nodes = 0; - a->btree.first_free = 8; + a->btree.first_free = cpu_to_le16(8); return a; } diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 6a2f04bf3df0..08b503e8ed29 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; if (btree->internal) { for (i = 0; i < btree->n_used_nodes; i++) - if (btree->u.internal[i].file_secno > sec) { - a = btree->u.internal[i].down; + if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { + a = le32_to_cpu(btree->u.internal[i].down); brelse(bh); if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; btree = &anode->btree; @@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, return -1; } for (i = 0; i < btree->n_used_nodes; i++) - if (btree->u.external[i].file_secno <= sec && - btree->u.external[i].file_secno + btree->u.external[i].length > sec) { - a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno; + if (le32_to_cpu(btree->u.external[i].file_secno) <= sec && + le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) { + a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno); if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) { brelse(bh); return -1; } if (inode) { struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); - hpfs_inode->i_file_sec = btree->u.external[i].file_secno; - hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno; - hpfs_inode->i_n_secs = btree->u.external[i].length; + hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno); + hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno); + hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length); } brelse(bh); return a; @@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi return -1; } if (btree->internal) { - a = btree->u.internal[n].down; - btree->u.internal[n].file_secno = -1; + a = le32_to_cpu(btree->u.internal[n].down); + btree->u.internal[n].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); brelse(bh); if (hpfs_sb(s)->sb_chk) @@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi goto go_down; } if (n >= 0) { - if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) { + if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) { hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x", - btree->u.external[n].file_secno + btree->u.external[n].length, fsecno, + le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno, fnod?'f':'a', node); brelse(bh); return -1; } - if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) { - btree->u.external[n].length++; + if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) { + btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1); mark_buffer_dirty(bh); brelse(bh); return se; @@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } se = !fnod ? node : (node + 16384) & ~16383; } - if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) { + if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) { brelse(bh); return -1; } - fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length; + fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length); if (!btree->n_free_nodes) { - up = a != node ? anode->up : -1; + up = a != node ? le32_to_cpu(anode->up) : -1; if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) { brelse(bh); hpfs_free_sectors(s, se, 1); return -1; } if (a == node && fnod) { - anode->up = node; + anode->up = cpu_to_le32(node); anode->btree.fnode_parent = 1; anode->btree.n_used_nodes = btree->n_used_nodes; anode->btree.first_free = btree->first_free; @@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi btree->internal = 1; btree->n_free_nodes = 11; btree->n_used_nodes = 1; - btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree; - btree->u.internal[0].file_secno = -1; - btree->u.internal[0].down = na; + btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); + btree->u.internal[0].file_secno = cpu_to_le32(-1); + btree->u.internal[0].down = cpu_to_le32(na); mark_buffer_dirty(bh); } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) { brelse(bh); @@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi btree = &anode->btree; } btree->n_free_nodes--; n = btree->n_used_nodes++; - btree->first_free += 12; - btree->u.external[n].disk_secno = se; - btree->u.external[n].file_secno = fs; - btree->u.external[n].length = 1; + btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12); + btree->u.external[n].disk_secno = cpu_to_le32(se); + btree->u.external[n].file_secno = cpu_to_le32(fs); + btree->u.external[n].length = cpu_to_le32(1); mark_buffer_dirty(bh); brelse(bh); if ((a == node && fnod) || na == -1) return se; c2 = 0; - while (up != -1) { + while (up != (anode_secno)-1) { struct anode *new_anode; if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1; @@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if (btree->n_free_nodes) { btree->n_free_nodes--; n = btree->n_used_nodes++; - btree->first_free += 8; - btree->u.internal[n].file_secno = -1; - btree->u.internal[n].down = na; - btree->u.internal[n-1].file_secno = fs; + btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8); + btree->u.internal[n].file_secno = cpu_to_le32(-1); + btree->u.internal[n].down = cpu_to_le32(na); + btree->u.internal[n-1].file_secno = cpu_to_le32(fs); mark_buffer_dirty(bh); brelse(bh); brelse(bh2); hpfs_free_sectors(s, ra, 1); if ((anode = hpfs_map_anode(s, na, &bh))) { - anode->up = up; + anode->up = cpu_to_le32(up); anode->btree.fnode_parent = up == node && fnod; mark_buffer_dirty(bh); brelse(bh); } return se; } - up = up != node ? anode->up : -1; - btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1; + up = up != node ? le32_to_cpu(anode->up) : -1; + btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1); mark_buffer_dirty(bh); brelse(bh); a = na; if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { anode = new_anode; - /*anode->up = up != -1 ? up : ra;*/ + /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ anode->btree.internal = 1; anode->btree.n_used_nodes = 1; anode->btree.n_free_nodes = 59; - anode->btree.first_free = 16; - anode->btree.u.internal[0].down = a; - anode->btree.u.internal[0].file_secno = -1; + anode->btree.first_free = cpu_to_le16(16); + anode->btree.u.internal[0].down = cpu_to_le32(a); + anode->btree.u.internal[0].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); brelse(bh); if ((anode = hpfs_map_anode(s, a, &bh))) { - anode->up = na; + anode->up = cpu_to_le32(na); mark_buffer_dirty(bh); brelse(bh); } } else na = a; } if ((anode = hpfs_map_anode(s, na, &bh))) { - anode->up = node; + anode->up = cpu_to_le32(node); if (fnod) anode->btree.fnode_parent = 1; mark_buffer_dirty(bh); brelse(bh); @@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } btree = &fnode->btree; } - ranode->up = node; - memcpy(&ranode->btree, btree, btree->first_free); + ranode->up = cpu_to_le32(node); + memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); if (fnod) ranode->btree.fnode_parent = 1; ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { struct anode *unode; - if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) { - unode->up = ra; + if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { + unode->up = cpu_to_le32(ra); unode->btree.fnode_parent = 0; mark_buffer_dirty(bh1); brelse(bh1); @@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi btree->internal = 1; btree->n_free_nodes = fnod ? 10 : 58; btree->n_used_nodes = 2; - btree->first_free = (char *)&btree->u.internal[2] - (char *)btree; - btree->u.internal[0].file_secno = fs; - btree->u.internal[0].down = ra; - btree->u.internal[1].file_secno = -1; - btree->u.internal[1].down = na; + btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); + btree->u.internal[0].file_secno = cpu_to_le32(fs); + btree->u.internal[0].down = cpu_to_le32(ra); + btree->u.internal[1].file_secno = cpu_to_le32(-1); + btree->u.internal[1].down = cpu_to_le32(na); mark_buffer_dirty(bh); brelse(bh); mark_buffer_dirty(bh2); @@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) go_down: d2 = 0; while (btree1->internal) { - ano = btree1->u.internal[pos].down; + ano = le32_to_cpu(btree1->u.internal[pos].down); if (level) brelse(bh); if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1")) @@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) pos = 0; } for (i = 0; i < btree1->n_used_nodes; i++) - hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length); + hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length)); go_up: if (!level) return; brelse(bh); @@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return; hpfs_free_sectors(s, ano, 1); oano = ano; - ano = anode->up; + ano = le32_to_cpu(anode->up); if (--level) { if (!(anode = hpfs_map_anode(s, ano, &bh))) return; btree1 = &anode->btree; } else btree1 = btree; for (i = 0; i < btree1->n_used_nodes; i++) { - if (btree1->u.internal[i].down == oano) { + if (le32_to_cpu(btree1->u.internal[i].down) == oano) { if ((pos = i + 1) < btree1->n_used_nodes) goto go_down; else @@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) if (fno) { btree->n_free_nodes = 8; btree->n_used_nodes = 0; - btree->first_free = 8; + btree->first_free = cpu_to_le16(8); btree->internal = 0; mark_buffer_dirty(bh); } else hpfs_free_sectors(s, f, 1); @@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) while (btree->internal) { nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) - if (btree->u.internal[i].file_secno >= secs) goto f; + if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; brelse(bh); hpfs_error(s, "internal btree %08x doesn't end with -1", node); return; f: for (j = i + 1; j < btree->n_used_nodes; j++) - hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0); + hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0); btree->n_used_nodes = i + 1; btree->n_free_nodes = nodes - btree->n_used_nodes; - btree->first_free = 8 + 8 * btree->n_used_nodes; + btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes); mark_buffer_dirty(bh); - if (btree->u.internal[i].file_secno == secs) { + if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) { brelse(bh); return; } - node = btree->u.internal[i].down; + node = le32_to_cpu(btree->u.internal[i].down); brelse(bh); if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree")) @@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) } nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) - if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff; + if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff; brelse(bh); return; ff: - if (secs <= btree->u.external[i].file_secno) { + if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) { hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs); if (i) i--; } - else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) { - hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs - - btree->u.external[i].file_secno, btree->u.external[i].length - - secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */ - btree->u.external[i].length = secs - btree->u.external[i].file_secno; + else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) { + hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs - + le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length) + - secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */ + btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno)); } for (j = i + 1; j < btree->n_used_nodes; j++) - hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length); + hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length)); btree->n_used_nodes = i + 1; btree->n_free_nodes = nodes - btree->n_used_nodes; - btree->first_free = 8 + 12 * btree->n_used_nodes; + btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes); mark_buffer_dirty(bh); brelse(bh); } @@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) struct extended_attribute *ea_end; if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); - else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno); + else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (ea->indirect) hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); - hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l); + hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); } diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index 793cb9d943d2..9ecde27d1e29 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -9,22 +9,6 @@ #include <linux/slab.h> #include "hpfs_fn.h" -void hpfs_lock_creation(struct super_block *s) -{ -#ifdef DEBUG_LOCKS - printk("lock creation\n"); -#endif - mutex_lock(&hpfs_sb(s)->hpfs_creation_de); -} - -void hpfs_unlock_creation(struct super_block *s) -{ -#ifdef DEBUG_LOCKS - printk("unlock creation\n"); -#endif - mutex_unlock(&hpfs_sb(s)->hpfs_creation_de); -} - /* Map a sector into a buffer and return pointers to it and to the buffer. */ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, @@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head { struct buffer_head *bh; + hpfs_lock_assert(s); + cond_resched(); *bhp = bh = sb_bread(s, secno); @@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head struct buffer_head *bh; /*return hpfs_map_sector(s, secno, bhp, 0);*/ + hpfs_lock_assert(s); + cond_resched(); if ((*bhp = bh = sb_getblk(s, secno)) != NULL) { @@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe struct buffer_head *bh; char *data; + hpfs_lock_assert(s); + cond_resched(); if (secno & 3) { @@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno, { cond_resched(); + hpfs_lock_assert(s); + if (secno & 3) { printk("HPFS: hpfs_get_4sectors: unaligned read\n"); return NULL; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index b3d7c0ddb609..f46ae025bfb5 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) hpfs_error(inode->i_sb, "not a directory, fnode %08lx", (unsigned long)inode->i_ino); } - if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) { + if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) { e = 1; - hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno); + hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno)); } brelse(bh); if (e) { @@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto again; } tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); - if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) { + if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) { filp->f_pos = old_pos; if (tempname != de->name) kfree(tempname); hpfs_brelse4(&qbh); @@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name * Get inode number, what we're after. */ - ino = de->fnode; + ino = le32_to_cpu(de->fnode); /* * Go find or make an inode. @@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name hpfs_init_inode(result); if (de->directory) hpfs_read_inode(result); - else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas) + else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas) hpfs_read_inode(result); else { result->i_mode |= S_IFREG; @@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name hpfs_result = hpfs_i(result); if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino; - hpfs_decide_conv(result, name, len); - if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) { hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); goto bail1; @@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name */ if (!result->i_ctime.tv_sec) { - if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date))) + if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date)))) result->i_ctime.tv_sec = 1; result->i_ctime.tv_nsec = 0; - result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date); + result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date)); result->i_mtime.tv_nsec = 0; - result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date); + result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date)); result->i_atime.tv_nsec = 0; - hpfs_result->i_ea_size = de->ea_size; + hpfs_result->i_ea_size = le32_to_cpu(de->ea_size); if (!hpfs_result->i_ea_mode && de->read_only) result->i_mode &= ~0222; if (!de->directory) { if (result->i_size == -1) { - result->i_size = de->file_size; + result->i_size = le32_to_cpu(de->file_size); result->i_data.a_ops = &hpfs_aops; hpfs_i(result)->mmu_private = result->i_size; /* diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 9b2ffadfc8c4..1e0e2ac30fd3 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde) struct hpfs_dirent *de_end = dnode_end_de(d); int i = 1; for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { - if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i; + if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i; i++; } printk("HPFS: get_pos: not_found\n"); - return ((loff_t)d->self << 4) | (loff_t)1; + return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1; } void hpfs_add_pos(struct inode *inode, loff_t *pos) @@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno { struct hpfs_dirent *de; if (!(de = dnode_last_de(d))) { - hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self); + hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self)); return; } if (hpfs_sb(s)->sb_chk) { if (de->down) { hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x", - d->self, de_down_pointer(de)); + le32_to_cpu(d->self), de_down_pointer(de)); return; } - if (de->length != 32) { - hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self); + if (le16_to_cpu(de->length) != 32) { + hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self)); return; } } if (ptr) { - if ((d->first_free += 4) > 2048) { - hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self); - d->first_free -= 4; + d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4); + if (le32_to_cpu(d->first_free) > 2048) { + hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self)); + d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4); return; } - de->length = 36; + de->length = cpu_to_le16(36); de->down = 1; - *(dnode_secno *)((char *)de + 32) = ptr; + *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr); } } @@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last); if (!c) { - hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self); + hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self)); return NULL; } if (c < 0) break; @@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, memmove((char *)de + d_size, de, (char *)de_end - (char *)de); memset(de, 0, d_size); if (down_ptr) { - *(int *)((char *)de + d_size - 4) = down_ptr; + *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); de->down = 1; } - de->length = d_size; - if (down_ptr) de->down = 1; + de->length = cpu_to_le16(d_size); de->not_8x3 = hpfs_is_name_long(name, namelen); de->namelen = namelen; memcpy(de->name, name, namelen); - d->first_free += d_size; + d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size); return de; } @@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d, struct hpfs_dirent *de) { if (de->last) { - hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self); + hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self)); return; } - d->first_free -= de->length; - memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de); + d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length)); + memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de); } static void fix_up_ptrs(struct super_block *s, struct dnode *d) { struct hpfs_dirent *de; struct hpfs_dirent *de_end = dnode_end_de(d); - dnode_secno dno = d->self; + dnode_secno dno = le32_to_cpu(d->self); for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) if (de->down) { struct quad_buffer_head qbh; struct dnode *dd; if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) { - if (dd->up != dno || dd->root_dnode) { - dd->up = dno; + if (le32_to_cpu(dd->up) != dno || dd->root_dnode) { + dd->up = cpu_to_le32(dno); dd->root_dnode = 0; hpfs_mark_4buffers_dirty(&qbh); } @@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, kfree(nname); return 1; } - if (d->first_free + de_size(namelen, down_ptr) <= 2048) { + if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) { loff_t t; copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de); t = get_pos(d, de); @@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, kfree(nname); return 1; } - memcpy(nd, d, d->first_free); + memcpy(nd, d, le32_to_cpu(d->first_free)); copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de); for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1); h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10; - if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) { + if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) { hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); hpfs_brelse4(&qbh); kfree(nd); @@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, down_ptr = adno; set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); de = de_next_de(de); - memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de); - nd->first_free -= (char *)de - (char *)nd - 20; - memcpy(d, nd, nd->first_free); + memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de); + nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20)); + memcpy(d, nd, le32_to_cpu(nd->first_free)); for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); fix_up_ptrs(i->i_sb, ad); if (!d->root_dnode) { - dno = ad->up = d->up; + ad->up = d->up; + dno = le32_to_cpu(ad->up); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); goto go_up; } - if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) { + if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) { hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); hpfs_brelse4(&qbh); hpfs_brelse4(&qbh1); @@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, i->i_blocks += 4; rd->root_dnode = 1; rd->up = d->up; - if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) { + if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) { hpfs_free_dnode(i->i_sb, rdno); hpfs_brelse4(&qbh); hpfs_brelse4(&qbh1); @@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, kfree(nname); return 1; } - fnode->u.external[0].disk_secno = rdno; + fnode->u.external[0].disk_secno = cpu_to_le32(rdno); mark_buffer_dirty(bh); brelse(bh); - d->up = ad->up = hpfs_i(i)->i_dno = rdno; + hpfs_i(i)->i_dno = rdno; + d->up = ad->up = cpu_to_le32(rdno); d->root_dnode = ad->root_dnode = 0; hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); @@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, int hpfs_add_dirent(struct inode *i, const unsigned char *name, unsigned namelen, - struct hpfs_dirent *new_de, int cdepth) + struct hpfs_dirent *new_de) { struct hpfs_inode_info *hpfs_inode = hpfs_i(i); struct dnode *d; @@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i, } } hpfs_brelse4(&qbh); - if (!cdepth) hpfs_lock_creation(i->i_sb); if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) { c = 1; goto ret; @@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i, i->i_version++; c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0); ret: - if (!cdepth) hpfs_unlock_creation(i->i_sb); return c; } @@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) return 0; if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0; if (hpfs_sb(i->i_sb)->sb_chk) { - if (dnode->up != chk_up) { + if (le32_to_cpu(dnode->up) != chk_up) { hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x", - dno, chk_up, dnode->up); + dno, chk_up, le32_to_cpu(dnode->up)); hpfs_brelse4(&qbh); return 0; } @@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) hpfs_brelse4(&qbh); } while (!(de = dnode_pre_last_de(dnode))) { - dnode_secno up = dnode->up; + dnode_secno up = le32_to_cpu(dnode->up); hpfs_brelse4(&qbh); hpfs_free_dnode(i->i_sb, dno); i->i_size -= 2048; @@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) hpfs_brelse4(&qbh); return 0; } - dnode->first_free -= 4; - de->length -= 4; + dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); + de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); de->down = 0; hpfs_mark_4buffers_dirty(&qbh); dno = up; @@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) t = get_pos(dnode, de); for_all_poss(i, hpfs_pos_subst, t, 4); for_all_poss(i, hpfs_pos_subst, t + 1, 5); - if (!(nde = kmalloc(de->length, GFP_NOFS))) { + if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) { hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted"); hpfs_brelse4(&qbh); return 0; } - memcpy(nde, de, de->length); + memcpy(nde, de, le16_to_cpu(de->length)); ddno = de->down ? de_down_pointer(de) : 0; hpfs_delete_de(i->i_sb, dnode, de); set_last_pointer(i->i_sb, dnode, ddno); @@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) try_it_again: if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return; if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return; - if (dnode->first_free > 56) goto end; - if (dnode->first_free == 52 || dnode->first_free == 56) { + if (le32_to_cpu(dnode->first_free) > 56) goto end; + if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) { struct hpfs_dirent *de_end; int root = dnode->root_dnode; - up = dnode->up; + up = le32_to_cpu(dnode->up); de = dnode_first_de(dnode); down = de->down ? de_down_pointer(de) : 0; if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) { @@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) return; } if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { - d1->up = up; + d1->up = cpu_to_le32(up); d1->root_dnode = 1; hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); } if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) { - fnode->u.external[0].disk_secno = down; + fnode->u.external[0].disk_secno = cpu_to_le32(down); mark_buffer_dirty(bh); brelse(bh); } @@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); if (!down) { de->down = 0; - de->length -= 4; - dnode->first_free -= 4; + de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); + dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); memmove(de_next_de(de), (char *)de_next_de(de) + 4, - (char *)dnode + dnode->first_free - (char *)de_next_de(de)); + (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de)); } else { struct dnode *d1; struct quad_buffer_head qbh1; - *(dnode_secno *) ((void *) de + de->length - 4) = down; + *(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down; if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { - d1->up = up; + d1->up = cpu_to_le32(up); hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); } } } else { - hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free); + hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free)); goto end; } @@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) struct quad_buffer_head qbh1; if (!de_next->down) goto endm; ndown = de_down_pointer(de_next); - if (!(de_cp = kmalloc(de->length, GFP_NOFS))) { + if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) { printk("HPFS: out of memory for dtree balancing\n"); goto endm; } - memcpy(de_cp, de, de->length); + memcpy(de_cp, de, le16_to_cpu(de->length)); hpfs_delete_de(i->i_sb, dnode, de); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4); for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1); if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) { - d1->up = ndown; + d1->up = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); } @@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) struct hpfs_dirent *del = dnode_last_de(d1); dlp = del->down ? de_down_pointer(del) : 0; if (!dlp && down) { - if (d1->first_free > 2044) { + if (le32_to_cpu(d1->first_free) > 2044) { if (hpfs_sb(i->i_sb)->sb_chk >= 2) { printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); printk("HPFS: warning: terminating balancing operation\n"); @@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); printk("HPFS: warning: goin'on\n"); } - del->length += 4; + del->length = cpu_to_le16(le16_to_cpu(del->length) + 4); del->down = 1; - d1->first_free += 4; + d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4); } if (dlp && !down) { - del->length -= 4; + del->length = cpu_to_le16(le16_to_cpu(del->length) - 4); del->down = 0; - d1->first_free -= 4; + d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); } else if (down) - *(dnode_secno *) ((void *) del + del->length - 4) = down; + *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; - if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) { + if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { printk("HPFS: out of memory for dtree balancing\n"); hpfs_brelse4(&qbh1); goto endm; } hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); - memcpy(de_cp, de_prev, de_prev->length); + memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length)); hpfs_delete_de(i->i_sb, dnode, de_prev); if (!de_prev->down) { - de_prev->length += 4; + de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4); de_prev->down = 1; - dnode->first_free += 4; + dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); } - *(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown; + *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1)); if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) { - d1->up = ndown; + d1->up = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh1); hpfs_brelse4(&qbh1); } @@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de, { struct dnode *dnode = qbh->data; dnode_secno down = 0; - int lock = 0; loff_t t; if (de->first || de->last) { hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno); @@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de, } if (de->down) down = de_down_pointer(de); if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) { - lock = 1; - hpfs_lock_creation(i->i_sb); if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) { hpfs_brelse4(qbh); - hpfs_unlock_creation(i->i_sb); return 2; } } @@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de, dnode_secno a = move_to_top(i, down, dno); for_all_poss(i, hpfs_pos_subst, 5, t); if (a) delete_empty_dnode(i, a); - if (lock) hpfs_unlock_creation(i->i_sb); return !a; } delete_empty_dnode(i, dno); - if (lock) hpfs_unlock_creation(i->i_sb); return 0; } @@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes, ptr = 0; go_up: if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return; - if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno) - hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up); + if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno) + hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up)); de = dnode_first_de(dnode); if (ptr) while(1) { if (de->down) if (de_down_pointer(de) == ptr) goto process_de; @@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes, if (!de->first && !de->last && n_items) (*n_items)++; if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de; ptr = dno; - dno = dnode->up; + dno = le32_to_cpu(dnode->up); if (dnode->root_dnode) { hpfs_brelse4(&qbh); return; @@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno) return d; if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno; if (hpfs_sb(s)->sb_chk) - if (up && ((struct dnode *)qbh.data)->up != up) - hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up); + if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up) + hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up)); if (!de->down) { hpfs_brelse4(&qbh); return d; @@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, /* Going up */ if (dnode->root_dnode) goto bail; - if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0))) + if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0))) goto bail; end_up_de = dnode_end_de(up_dnode); @@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, for (up_de = dnode_first_de(up_dnode); up_de < end_up_de; up_de = de_next_de(up_de)) { if (!(++c & 077)) hpfs_error(inode->i_sb, - "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up); + "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up)); if (up_de->down && de_down_pointer(up_de) == dno) { - *posp = ((loff_t) dnode->up << 4) + c; + *posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c; hpfs_brelse4(&qbh0); return de; } } hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x", - dno, dnode->up); + dno, le32_to_cpu(dnode->up)); hpfs_brelse4(&qbh0); bail: @@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, /*name2[15] = 0xff;*/ name1len = 15; name2len = 256; } - if (!(upf = hpfs_map_fnode(s, f->up, &bh))) { + if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) { kfree(name2); return NULL; } if (!upf->dirflag) { brelse(bh); - hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up); + hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); kfree(name2); return NULL; } - dno = upf->u.external[0].disk_secno; + dno = le32_to_cpu(upf->u.external[0].disk_secno); brelse(bh); go_down: downd = 0; @@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, return NULL; } next_de: - if (de->fnode == fno) { + if (le32_to_cpu(de->fnode) == fno) { kfree(name2); return de; } @@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, goto go_down; } f: - if (de->fnode == fno) { + if (le32_to_cpu(de->fnode) == fno) { kfree(name2); return de; } @@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, if ((de = de_next_de(de)) < de_end) goto next_de; if (d->root_dnode) goto not_found; downd = dno; - dno = d->up; + dno = le32_to_cpu(d->up); hpfs_brelse4(qbh); if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) { diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index 45e53d972b42..d8b84d113c89 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; if (ea->indirect) { - if (ea->valuelen != 8) { + if (ea_valuelen(ea) != 8) { hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", ano ? "anode" : "sectors", a, pos); return; @@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) return; hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); } - pos += ea->namelen + ea->valuelen + 5; + pos += ea->namelen + ea_valuelen(ea) + 5; } if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9); else { @@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, unsigned pos; int ano, len; secno a; + char ex[4 + 255 + 1 + 8]; struct extended_attribute *ea; struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { if (ea->indirect) goto indirect; - if (ea->valuelen >= size) + if (ea_valuelen(ea) >= size) return -EINVAL; - memcpy(buf, ea_data(ea), ea->valuelen); - buf[ea->valuelen] = 0; + memcpy(buf, ea_data(ea), ea_valuelen(ea)); + buf[ea_valuelen(ea)] = 0; return 0; } - a = fnode->ea_secno; - len = fnode->ea_size_l; + a = le32_to_cpu(fnode->ea_secno); + len = le32_to_cpu(fnode->ea_size_l); ano = fnode->ea_anode; pos = 0; while (pos < len) { - char ex[4 + 255 + 1 + 8]; ea = (struct extended_attribute *)ex; if (pos + 4 > len) { hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x", @@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, if (!strcmp(ea->name, key)) { if (ea->indirect) goto indirect; - if (ea->valuelen >= size) + if (ea_valuelen(ea) >= size) return -EINVAL; - if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf)) + if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf)) return -EIO; - buf[ea->valuelen] = 0; + buf[ea_valuelen(ea)] = 0; return 0; } - pos += ea->namelen + ea->valuelen + 5; + pos += ea->namelen + ea_valuelen(ea) + 5; } return -ENOENT; indirect: @@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si if (!strcmp(ea->name, key)) { if (ea->indirect) return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); - if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { + if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; } - memcpy(ret, ea_data(ea), ea->valuelen); - ret[ea->valuelen] = 0; + memcpy(ret, ea_data(ea), ea_valuelen(ea)); + ret[ea_valuelen(ea)] = 0; return ret; } - a = fnode->ea_secno; - len = fnode->ea_size_l; + a = le32_to_cpu(fnode->ea_secno); + len = le32_to_cpu(fnode->ea_size_l); ano = fnode->ea_anode; pos = 0; while (pos < len) { @@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si if (!strcmp(ea->name, key)) { if (ea->indirect) return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); - if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { + if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; } - if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) { + if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) { kfree(ret); return NULL; } - ret[ea->valuelen] = 0; + ret[ea_valuelen(ea)] = 0; return ret; } - pos += ea->namelen + ea->valuelen + 5; + pos += ea->namelen + ea_valuelen(ea) + 5; } return NULL; } @@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, if (ea->indirect) { if (ea_len(ea) == size) set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); - } else if (ea->valuelen == size) { + } else if (ea_valuelen(ea) == size) { memcpy(ea_data(ea), data, size); } return; } - a = fnode->ea_secno; - len = fnode->ea_size_l; + a = le32_to_cpu(fnode->ea_secno); + len = le32_to_cpu(fnode->ea_size_l); ano = fnode->ea_anode; pos = 0; while (pos < len) { @@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); } else { - if (ea->valuelen == size) + if (ea_valuelen(ea) == size) hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data); } return; } - pos += ea->namelen + ea->valuelen + 5; + pos += ea->namelen + ea_valuelen(ea) + 5; } - if (!fnode->ea_offs) { - /*if (fnode->ea_size_s) { + if (!le16_to_cpu(fnode->ea_offs)) { + /*if (le16_to_cpu(fnode->ea_size_s)) { hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0", - inode->i_ino, fnode->ea_size_s); + inode->i_ino, le16_to_cpu(fnode->ea_size_s)); return; }*/ - fnode->ea_offs = 0xc4; + fnode->ea_offs = cpu_to_le16(0xc4); } - if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) { + if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", (unsigned long)inode->i_ino, - fnode->ea_offs, fnode->ea_size_s); + le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); return; } - if ((fnode->ea_size_s || !fnode->ea_size_l) && - fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) { + if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) { ea = fnode_end_ea(fnode); *(char *)ea = 0; ea->namelen = strlen(key); - ea->valuelen = size; + ea->valuelen_lo = size; + ea->valuelen_hi = size >> 8; strcpy(ea->name, key); memcpy(ea_data(ea), data, size); - fnode->ea_size_s += strlen(key) + size + 5; + fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5); goto ret; } /* Most the code here is 99.9993422% unused. I hope there are no bugs. But what .. HPFS.IFS has also bugs in ea management. */ - if (fnode->ea_size_s && !fnode->ea_size_l) { + if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) { secno n; struct buffer_head *bh; char *data; - if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return; + if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return; if (!(data = hpfs_get_sector(s, n, &bh))) { hpfs_free_sectors(s, n, 1); return; } - memcpy(data, fnode_ea(fnode), fnode->ea_size_s); - fnode->ea_size_l = fnode->ea_size_s; - fnode->ea_size_s = 0; - fnode->ea_secno = n; - fnode->ea_anode = 0; + memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s)); + fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); + fnode->ea_size_s = cpu_to_le16(0); + fnode->ea_secno = cpu_to_le32(n); + fnode->ea_anode = cpu_to_le32(0); mark_buffer_dirty(bh); brelse(bh); } - pos = fnode->ea_size_l + 5 + strlen(key) + size; - len = (fnode->ea_size_l + 511) >> 9; + pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size; + len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9; if (pos >= 30000) goto bail; while (((pos + 511) >> 9) > len) { if (!len) { - if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1))) - goto bail; + secno q = hpfs_alloc_sector(s, fno, 1, 0); + if (!q) goto bail; + fnode->ea_secno = cpu_to_le32(q); fnode->ea_anode = 0; len++; } else if (!fnode->ea_anode) { - if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) { + if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { len++; } else { /* Aargh... don't know how to create ea anodes :-( */ @@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, anode_secno a_s; if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh))) goto bail; - anode->up = fno; + anode->up = cpu_to_le32(fno); anode->btree.fnode_parent = 1; anode->btree.n_free_nodes--; anode->btree.n_used_nodes++; - anode->btree.first_free += 12; - anode->u.external[0].disk_secno = fnode->ea_secno; - anode->u.external[0].file_secno = 0; - anode->u.external[0].length = len; + anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12); + anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno)); + anode->u.external[0].file_secno = cpu_to_le32(0); + anode->u.external[0].length = cpu_to_le32(len); mark_buffer_dirty(bh); brelse(bh); fnode->ea_anode = 1; - fnode->ea_secno = a_s;*/ + fnode->ea_secno = cpu_to_le32(a_s);*/ secno new_sec; int i; - if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1))) + if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9)))) goto bail; for (i = 0; i < len; i++) { struct buffer_head *bh1, *bh2; void *b1, *b2; - if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) { + if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) { hpfs_free_sectors(s, new_sec, (pos + 511) >> 9); goto bail; } @@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, mark_buffer_dirty(bh2); brelse(bh2); } - hpfs_free_sectors(s, fnode->ea_secno, len); - fnode->ea_secno = new_sec; + hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len); + fnode->ea_secno = cpu_to_le32(new_sec); len = (pos + 511) >> 9; } } if (fnode->ea_anode) { - if (hpfs_add_sector_to_btree(s, fnode->ea_secno, + if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), 0, len) != -1) { len++; } else { @@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, h[1] = strlen(key); h[2] = size & 0xff; h[3] = size >> 8; - if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail; - if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail; - if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail; - fnode->ea_size_l = pos; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; + fnode->ea_size_l = cpu_to_le32(pos); ret: hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; return; bail: - if (fnode->ea_secno) - if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9); - else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9)); - else fnode->ea_secno = fnode->ea_size_l = 0; + if (le32_to_cpu(fnode->ea_secno)) + if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); + else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); + else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 9b9eb6933e43..89c500ee5213 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file) int hpfs_file_fsync(struct file *file, int datasync) { - /*return file_fsync(file, datasync);*/ - return 0; /* Don't fsync :-) */ + struct inode *inode = file->f_mapping->host; + return sync_blockdev(inode->i_sb->s_bdev); } /* @@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) static void hpfs_truncate(struct inode *i) { if (IS_IMMUTABLE(i)) return /*-EPERM*/; - hpfs_lock(i->i_sb); + hpfs_lock_assert(i->i_sb); + hpfs_i(i)->i_n_secs = 0; i->i_blocks = 1 + ((i->i_size + 511) >> 9); hpfs_i(i)->mmu_private = i->i_size; hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); hpfs_write_inode(i); hpfs_i(i)->i_n_secs = 0; - hpfs_unlock(i->i_sb); } static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + int r; secno s; + hpfs_lock(inode->i_sb); s = hpfs_bmap(inode, iblock); if (s) { map_bh(bh_result, inode->i_sb, s); - return 0; + goto ret_0; } - if (!create) return 0; + if (!create) goto ret_0; if (iblock<<9 != hpfs_i(inode)->mmu_private) { BUG(); - return -EIO; + r = -EIO; + goto ret_r; } if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) { hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1); - return -ENOSPC; + r = -ENOSPC; + goto ret_r; } inode->i_blocks++; hpfs_i(inode)->mmu_private += 512; set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, s); - return 0; + ret_0: + r = 0; + ret_r: + hpfs_unlock(inode->i_sb); + return r; } static int hpfs_writepage(struct page *page, struct writeback_control *wbc) @@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf, ssize_t retval; retval = do_sync_write(file, buf, count, ppos); - if (retval > 0) + if (retval > 0) { + hpfs_lock(file->f_path.dentry->d_sb); hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1; + hpfs_unlock(file->f_path.dentry->d_sb); + } return retval; } diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 0e84c73cd9c4..8b0650aae328 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -19,9 +19,13 @@ For definitive information on HPFS, ask somebody else -- this is guesswork. There are certain to be many mistakes. */ +#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN) +#error unknown endian +#endif + /* Notation */ -typedef unsigned secno; /* sector number, partition relative */ +typedef u32 secno; /* sector number, partition relative */ typedef secno dnode_secno; /* sector number of a dnode */ typedef secno fnode_secno; /* sector number of an fnode */ @@ -38,28 +42,28 @@ typedef u32 time32_t; /* 32-bit time_t type */ struct hpfs_boot_block { - unsigned char jmp[3]; - unsigned char oem_id[8]; - unsigned char bytes_per_sector[2]; /* 512 */ - unsigned char sectors_per_cluster; - unsigned char n_reserved_sectors[2]; - unsigned char n_fats; - unsigned char n_rootdir_entries[2]; - unsigned char n_sectors_s[2]; - unsigned char media_byte; - unsigned short sectors_per_fat; - unsigned short sectors_per_track; - unsigned short heads_per_cyl; - unsigned int n_hidden_sectors; - unsigned int n_sectors_l; /* size of partition */ - unsigned char drive_number; - unsigned char mbz; - unsigned char sig_28h; /* 28h */ - unsigned char vol_serno[4]; - unsigned char vol_label[11]; - unsigned char sig_hpfs[8]; /* "HPFS " */ - unsigned char pad[448]; - unsigned short magic; /* aa55 */ + u8 jmp[3]; + u8 oem_id[8]; + u8 bytes_per_sector[2]; /* 512 */ + u8 sectors_per_cluster; + u8 n_reserved_sectors[2]; + u8 n_fats; + u8 n_rootdir_entries[2]; + u8 n_sectors_s[2]; + u8 media_byte; + u16 sectors_per_fat; + u16 sectors_per_track; + u16 heads_per_cyl; + u32 n_hidden_sectors; + u32 n_sectors_l; /* size of partition */ + u8 drive_number; + u8 mbz; + u8 sig_28h; /* 28h */ + u8 vol_serno[4]; + u8 vol_label[11]; + u8 sig_hpfs[8]; /* "HPFS " */ + u8 pad[448]; + u16 magic; /* aa55 */ }; @@ -71,31 +75,29 @@ struct hpfs_boot_block struct hpfs_super_block { - unsigned magic; /* f995 e849 */ - unsigned magic1; /* fa53 e9c5, more magic? */ - /*unsigned huh202;*/ /* ?? 202 = N. of B. in 1.00390625 S.*/ - char version; /* version of a filesystem usually 2 */ - char funcversion; /* functional version - oldest version + u32 magic; /* f995 e849 */ + u32 magic1; /* fa53 e9c5, more magic? */ + u8 version; /* version of a filesystem usually 2 */ + u8 funcversion; /* functional version - oldest version of filesystem that can understand this disk */ - unsigned short int zero; /* 0 */ + u16 zero; /* 0 */ fnode_secno root; /* fnode of root directory */ secno n_sectors; /* size of filesystem */ - unsigned n_badblocks; /* number of bad blocks */ + u32 n_badblocks; /* number of bad blocks */ secno bitmaps; /* pointers to free space bit maps */ - unsigned zero1; /* 0 */ + u32 zero1; /* 0 */ secno badblocks; /* bad block list */ - unsigned zero3; /* 0 */ + u32 zero3; /* 0 */ time32_t last_chkdsk; /* date last checked, 0 if never */ - /*unsigned zero4;*/ /* 0 */ - time32_t last_optimize; /* date last optimized, 0 if never */ + time32_t last_optimize; /* date last optimized, 0 if never */ secno n_dir_band; /* number of sectors in dir band */ secno dir_band_start; /* first sector in dir band */ secno dir_band_end; /* last sector in dir band */ secno dir_band_bitmap; /* free space map, 1 dnode per bit */ - char volume_name[32]; /* not used */ + u8 volume_name[32]; /* not used */ secno user_id_table; /* 8 preallocated sectors - user id */ - unsigned zero6[103]; /* 0 */ + u32 zero6[103]; /* 0 */ }; @@ -107,44 +109,65 @@ struct hpfs_super_block struct hpfs_spare_block { - unsigned magic; /* f991 1849 */ - unsigned magic1; /* fa52 29c5, more magic? */ - - unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ - /*unsigned flag1234: 4;*/ /* unknown flags */ - unsigned sparedir_used: 1; /* spare dirblks used */ - unsigned hotfixes_used: 1; /* hotfixes used */ - unsigned bad_sector: 1; /* bad sector, corrupted disk (???) */ - unsigned bad_bitmap: 1; /* bad bitmap */ - unsigned fast: 1; /* partition was fast formatted */ - unsigned old_wrote: 1; /* old version wrote to partion */ - unsigned old_wrote_1: 1; /* old version wrote to partion (?) */ - unsigned install_dasd_limits: 1; /* HPFS386 flags */ - unsigned resynch_dasd_limits: 1; - unsigned dasd_limits_operational: 1; - unsigned multimedia_active: 1; - unsigned dce_acls_active: 1; - unsigned dasd_limits_dirty: 1; - unsigned flag67: 2; - unsigned char mm_contlgulty; - unsigned char unused; + u32 magic; /* f991 1849 */ + u32 magic1; /* fa52 29c5, more magic? */ + +#ifdef __LITTLE_ENDIAN + u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ + u8 sparedir_used: 1; /* spare dirblks used */ + u8 hotfixes_used: 1; /* hotfixes used */ + u8 bad_sector: 1; /* bad sector, corrupted disk (???) */ + u8 bad_bitmap: 1; /* bad bitmap */ + u8 fast: 1; /* partition was fast formatted */ + u8 old_wrote: 1; /* old version wrote to partion */ + u8 old_wrote_1: 1; /* old version wrote to partion (?) */ +#else + u8 old_wrote_1: 1; /* old version wrote to partion (?) */ + u8 old_wrote: 1; /* old version wrote to partion */ + u8 fast: 1; /* partition was fast formatted */ + u8 bad_bitmap: 1; /* bad bitmap */ + u8 bad_sector: 1; /* bad sector, corrupted disk (???) */ + u8 hotfixes_used: 1; /* hotfixes used */ + u8 sparedir_used: 1; /* spare dirblks used */ + u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ +#endif + +#ifdef __LITTLE_ENDIAN + u8 install_dasd_limits: 1; /* HPFS386 flags */ + u8 resynch_dasd_limits: 1; + u8 dasd_limits_operational: 1; + u8 multimedia_active: 1; + u8 dce_acls_active: 1; + u8 dasd_limits_dirty: 1; + u8 flag67: 2; +#else + u8 flag67: 2; + u8 dasd_limits_dirty: 1; + u8 dce_acls_active: 1; + u8 multimedia_active: 1; + u8 dasd_limits_operational: 1; + u8 resynch_dasd_limits: 1; + u8 install_dasd_limits: 1; /* HPFS386 flags */ +#endif + + u8 mm_contlgulty; + u8 unused; secno hotfix_map; /* info about remapped bad sectors */ - unsigned n_spares_used; /* number of hotfixes */ - unsigned n_spares; /* number of spares in hotfix map */ - unsigned n_dnode_spares_free; /* spare dnodes unused */ - unsigned n_dnode_spares; /* length of spare_dnodes[] list, + u32 n_spares_used; /* number of hotfixes */ + u32 n_spares; /* number of spares in hotfix map */ + u32 n_dnode_spares_free; /* spare dnodes unused */ + u32 n_dnode_spares; /* length of spare_dnodes[] list, follows in this block*/ secno code_page_dir; /* code page directory block */ - unsigned n_code_pages; /* number of code pages */ - /*unsigned large_numbers[2];*/ /* ?? */ - unsigned super_crc; /* on HPFS386 and LAN Server this is + u32 n_code_pages; /* number of code pages */ + u32 super_crc; /* on HPFS386 and LAN Server this is checksum of superblock, on normal OS/2 unused */ - unsigned spare_crc; /* on HPFS386 checksum of spareblock */ - unsigned zero1[15]; /* unused */ + u32 spare_crc; /* on HPFS386 checksum of spareblock */ + u32 zero1[15]; /* unused */ dnode_secno spare_dnodes[100]; /* emergency free dnode list */ - unsigned zero2[1]; /* room for more? */ + u32 zero2[1]; /* room for more? */ }; /* The bad block list is 4 sectors long. The first word must be zero, @@ -179,18 +202,18 @@ struct hpfs_spare_block struct code_page_directory { - unsigned magic; /* 4945 21f7 */ - unsigned n_code_pages; /* number of pointers following */ - unsigned zero1[2]; + u32 magic; /* 4945 21f7 */ + u32 n_code_pages; /* number of pointers following */ + u32 zero1[2]; struct { - unsigned short ix; /* index */ - unsigned short code_page_number; /* code page number */ - unsigned bounds; /* matches corresponding word + u16 ix; /* index */ + u16 code_page_number; /* code page number */ + u32 bounds; /* matches corresponding word in data block */ secno code_page_data; /* sector number of a code_page_data containing c.p. array */ - unsigned short index; /* index in c.p. array in that sector*/ - unsigned short unknown; /* some unknown value; usually 0; + u16 index; /* index in c.p. array in that sector*/ + u16 unknown; /* some unknown value; usually 0; 2 in Japanese version */ } array[31]; /* unknown length */ }; @@ -201,21 +224,21 @@ struct code_page_directory struct code_page_data { - unsigned magic; /* 8945 21f7 */ - unsigned n_used; /* # elements used in c_p_data[] */ - unsigned bounds[3]; /* looks a bit like + u32 magic; /* 8945 21f7 */ + u32 n_used; /* # elements used in c_p_data[] */ + u32 bounds[3]; /* looks a bit like (beg1,end1), (beg2,end2) one byte each */ - unsigned short offs[3]; /* offsets from start of sector + u16 offs[3]; /* offsets from start of sector to start of c_p_data[ix] */ struct { - unsigned short ix; /* index */ - unsigned short code_page_number; /* code page number */ - unsigned short unknown; /* the same as in cp directory */ - unsigned char map[128]; /* upcase table for chars 80..ff */ - unsigned short zero2; + u16 ix; /* index */ + u16 code_page_number; /* code page number */ + u16 unknown; /* the same as in cp directory */ + u8 map[128]; /* upcase table for chars 80..ff */ + u16 zero2; } code_page[3]; - unsigned char incognita[78]; + u8 incognita[78]; }; @@ -255,50 +278,84 @@ struct code_page_data #define DNODE_MAGIC 0x77e40aae struct dnode { - unsigned magic; /* 77e4 0aae */ - unsigned first_free; /* offset from start of dnode to + u32 magic; /* 77e4 0aae */ + u32 first_free; /* offset from start of dnode to first free dir entry */ - unsigned root_dnode:1; /* Is it root dnode? */ - unsigned increment_me:31; /* some kind of activity counter? - Neither HPFS.IFS nor CHKDSK cares +#ifdef __LITTLE_ENDIAN + u8 root_dnode: 1; /* Is it root dnode? */ + u8 increment_me: 7; /* some kind of activity counter? */ + /* Neither HPFS.IFS nor CHKDSK cares + if you change this word */ +#else + u8 increment_me: 7; /* some kind of activity counter? */ + /* Neither HPFS.IFS nor CHKDSK cares if you change this word */ + u8 root_dnode: 1; /* Is it root dnode? */ +#endif + u8 increment_me2[3]; secno up; /* (root dnode) directory's fnode (nonroot) parent dnode */ dnode_secno self; /* pointer to this dnode */ - unsigned char dirent[2028]; /* one or more dirents */ + u8 dirent[2028]; /* one or more dirents */ }; struct hpfs_dirent { - unsigned short length; /* offset to next dirent */ - unsigned first: 1; /* set on phony ^A^A (".") entry */ - unsigned has_acl: 1; - unsigned down: 1; /* down pointer present (after name) */ - unsigned last: 1; /* set on phony \377 entry */ - unsigned has_ea: 1; /* entry has EA */ - unsigned has_xtd_perm: 1; /* has extended perm list (???) */ - unsigned has_explicit_acl: 1; - unsigned has_needea: 1; /* ?? some EA has NEEDEA set + u16 length; /* offset to next dirent */ + +#ifdef __LITTLE_ENDIAN + u8 first: 1; /* set on phony ^A^A (".") entry */ + u8 has_acl: 1; + u8 down: 1; /* down pointer present (after name) */ + u8 last: 1; /* set on phony \377 entry */ + u8 has_ea: 1; /* entry has EA */ + u8 has_xtd_perm: 1; /* has extended perm list (???) */ + u8 has_explicit_acl: 1; + u8 has_needea: 1; /* ?? some EA has NEEDEA set + I have no idea why this is + interesting in a dir entry */ +#else + u8 has_needea: 1; /* ?? some EA has NEEDEA set I have no idea why this is interesting in a dir entry */ - unsigned read_only: 1; /* dos attrib */ - unsigned hidden: 1; /* dos attrib */ - unsigned system: 1; /* dos attrib */ - unsigned flag11: 1; /* would be volume label dos attrib */ - unsigned directory: 1; /* dos attrib */ - unsigned archive: 1; /* dos attrib */ - unsigned not_8x3: 1; /* name is not 8.3 */ - unsigned flag15: 1; + u8 has_explicit_acl: 1; + u8 has_xtd_perm: 1; /* has extended perm list (???) */ + u8 has_ea: 1; /* entry has EA */ + u8 last: 1; /* set on phony \377 entry */ + u8 down: 1; /* down pointer present (after name) */ + u8 has_acl: 1; + u8 first: 1; /* set on phony ^A^A (".") entry */ +#endif + +#ifdef __LITTLE_ENDIAN + u8 read_only: 1; /* dos attrib */ + u8 hidden: 1; /* dos attrib */ + u8 system: 1; /* dos attrib */ + u8 flag11: 1; /* would be volume label dos attrib */ + u8 directory: 1; /* dos attrib */ + u8 archive: 1; /* dos attrib */ + u8 not_8x3: 1; /* name is not 8.3 */ + u8 flag15: 1; +#else + u8 flag15: 1; + u8 not_8x3: 1; /* name is not 8.3 */ + u8 archive: 1; /* dos attrib */ + u8 directory: 1; /* dos attrib */ + u8 flag11: 1; /* would be volume label dos attrib */ + u8 system: 1; /* dos attrib */ + u8 hidden: 1; /* dos attrib */ + u8 read_only: 1; /* dos attrib */ +#endif + fnode_secno fnode; /* fnode giving allocation info */ time32_t write_date; /* mtime */ - unsigned file_size; /* file length, bytes */ + u32 file_size; /* file length, bytes */ time32_t read_date; /* atime */ time32_t creation_date; /* ctime */ - unsigned ea_size; /* total EA length, bytes */ - unsigned char no_of_acls : 3; /* number of ACL's */ - unsigned char reserver : 5; - unsigned char ix; /* code page index (of filename), see + u32 ea_size; /* total EA length, bytes */ + u8 no_of_acls; /* number of ACL's (low 3 bits) */ + u8 ix; /* code page index (of filename), see struct code_page_data */ - unsigned char namelen, name[1]; /* file name */ + u8 namelen, name[1]; /* file name */ /* dnode_secno down; btree down pointer, if present, follows name on next word boundary, or maybe it precedes next dirent, which is on a word boundary. */ @@ -318,38 +375,50 @@ struct hpfs_dirent { struct bplus_leaf_node { - unsigned file_secno; /* first file sector in extent */ - unsigned length; /* length, sectors */ + u32 file_secno; /* first file sector in extent */ + u32 length; /* length, sectors */ secno disk_secno; /* first corresponding disk sector */ }; struct bplus_internal_node { - unsigned file_secno; /* subtree maps sectors < this */ + u32 file_secno; /* subtree maps sectors < this */ anode_secno down; /* pointer to subtree */ }; struct bplus_header { - unsigned hbff: 1; /* high bit of first free entry offset */ - unsigned flag1: 1; - unsigned flag2: 1; - unsigned flag3: 1; - unsigned flag4: 1; - unsigned fnode_parent: 1; /* ? we're pointed to by an fnode, +#ifdef __LITTLE_ENDIAN + u8 hbff: 1; /* high bit of first free entry offset */ + u8 flag1234: 4; + u8 fnode_parent: 1; /* ? we're pointed to by an fnode, the data btree or some ea or the main ea bootage pointer ea_secno */ /* also can get set in fnodes, which may be a chkdsk glitch or may mean this bit is irrelevant in fnodes, or this interpretation is all wet */ - unsigned binary_search: 1; /* suggest binary search (unused) */ - unsigned internal: 1; /* 1 -> (internal) tree of anodes + u8 binary_search: 1; /* suggest binary search (unused) */ + u8 internal: 1; /* 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ +#else + u8 internal: 1; /* 1 -> (internal) tree of anodes 0 -> (leaf) list of extents */ - unsigned char fill[3]; - unsigned char n_free_nodes; /* free nodes in following array */ - unsigned char n_used_nodes; /* used nodes in following array */ - unsigned short first_free; /* offset from start of header to + u8 binary_search: 1; /* suggest binary search (unused) */ + u8 fnode_parent: 1; /* ? we're pointed to by an fnode, + the data btree or some ea or the + main ea bootage pointer ea_secno */ + /* also can get set in fnodes, which + may be a chkdsk glitch or may mean + this bit is irrelevant in fnodes, + or this interpretation is all wet */ + u8 flag1234: 4; + u8 hbff: 1; /* high bit of first free entry offset */ +#endif + u8 fill[3]; + u8 n_free_nodes; /* free nodes in following array */ + u8 n_used_nodes; /* used nodes in following array */ + u16 first_free; /* offset from start of header to first free node in array */ union { struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving @@ -369,37 +438,38 @@ struct bplus_header struct fnode { - unsigned magic; /* f7e4 0aae */ - unsigned zero1[2]; /* read history */ - unsigned char len, name[15]; /* true length, truncated name */ + u32 magic; /* f7e4 0aae */ + u32 zero1[2]; /* read history */ + u8 len, name[15]; /* true length, truncated name */ fnode_secno up; /* pointer to file's directory fnode */ - /*unsigned zero2[3];*/ secno acl_size_l; secno acl_secno; - unsigned short acl_size_s; - char acl_anode; - char zero2; /* history bit count */ - unsigned ea_size_l; /* length of disk-resident ea's */ + u16 acl_size_s; + u8 acl_anode; + u8 zero2; /* history bit count */ + u32 ea_size_l; /* length of disk-resident ea's */ secno ea_secno; /* first sector of disk-resident ea's*/ - unsigned short ea_size_s; /* length of fnode-resident ea's */ - - unsigned flag0: 1; - unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ - unsigned flag2: 1; - unsigned flag3: 1; - unsigned flag4: 1; - unsigned flag5: 1; - unsigned flag6: 1; - unsigned flag7: 1; - unsigned dirflag: 1; /* 1 -> directory. first & only extent + u16 ea_size_s; /* length of fnode-resident ea's */ + +#ifdef __LITTLE_ENDIAN + u8 flag0: 1; + u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ + u8 flag234567: 6; +#else + u8 flag234567: 6; + u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ + u8 flag0: 1; +#endif + +#ifdef __LITTLE_ENDIAN + u8 dirflag: 1; /* 1 -> directory. first & only extent points to dnode. */ - unsigned flag9: 1; - unsigned flag10: 1; - unsigned flag11: 1; - unsigned flag12: 1; - unsigned flag13: 1; - unsigned flag14: 1; - unsigned flag15: 1; + u8 flag9012345: 7; +#else + u8 flag9012345: 7; + u8 dirflag: 1; /* 1 -> directory. first & only extent + points to dnode. */ +#endif struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ union { @@ -407,17 +477,16 @@ struct fnode struct bplus_internal_node internal[12]; } u; - unsigned file_size; /* file length, bytes */ - unsigned n_needea; /* number of EA's with NEEDEA set */ - char user_id[16]; /* unused */ - unsigned short ea_offs; /* offset from start of fnode + u32 file_size; /* file length, bytes */ + u32 n_needea; /* number of EA's with NEEDEA set */ + u8 user_id[16]; /* unused */ + u16 ea_offs; /* offset from start of fnode to first fnode-resident ea */ - char dasd_limit_treshhold; - char dasd_limit_delta; - unsigned dasd_limit; - unsigned dasd_usage; - /*unsigned zero5[2];*/ - unsigned char ea[316]; /* zero or more EA's, packed together + u8 dasd_limit_treshhold; + u8 dasd_limit_delta; + u32 dasd_limit; + u32 dasd_usage; + u8 ea[316]; /* zero or more EA's, packed together with no alignment padding. (Do not use this name, get here via fnode + ea_offs. I think.) */ @@ -430,7 +499,7 @@ struct fnode struct anode { - unsigned magic; /* 37e4 0aae */ + u32 magic; /* 37e4 0aae */ anode_secno self; /* pointer to this anode */ secno up; /* parent anode or fnode */ @@ -440,7 +509,7 @@ struct anode struct bplus_internal_node internal[60]; } u; - unsigned fill[3]; /* unused */ + u32 fill[3]; /* unused */ }; @@ -461,25 +530,31 @@ struct anode struct extended_attribute { - unsigned indirect: 1; /* 1 -> value gives sector number +#ifdef __LITTLE_ENDIAN + u8 indirect: 1; /* 1 -> value gives sector number where real value starts */ - unsigned anode: 1; /* 1 -> sector is an anode + u8 anode: 1; /* 1 -> sector is an anode + that points to fragmented value */ + u8 flag23456: 5; + u8 needea: 1; /* required ea */ +#else + u8 needea: 1; /* required ea */ + u8 flag23456: 5; + u8 anode: 1; /* 1 -> sector is an anode that points to fragmented value */ - unsigned flag2: 1; - unsigned flag3: 1; - unsigned flag4: 1; - unsigned flag5: 1; - unsigned flag6: 1; - unsigned needea: 1; /* required ea */ - unsigned char namelen; /* length of name, bytes */ - unsigned short valuelen; /* length of value, bytes */ - unsigned char name[0]; + u8 indirect: 1; /* 1 -> value gives sector number + where real value starts */ +#endif + u8 namelen; /* length of name, bytes */ + u8 valuelen_lo; /* length of value, bytes */ + u8 valuelen_hi; /* length of value, bytes */ + u8 name[0]; /* - unsigned char name[namelen]; ascii attrib name - unsigned char nul; terminating '\0', not counted - unsigned char value[valuelen]; value, arbitrary + u8 name[namelen]; ascii attrib name + u8 nul; terminating '\0', not counted + u8 value[valuelen]; value, arbitrary if this.indirect, valuelen is 8 and the value is - unsigned length; real length of value, bytes + u32 length; real length of value, bytes secno secno; sector address where it starts if this.anode, the above sector number is the root of an anode tree which points to the value. diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c15adbca07ff..dd552f862c8f 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -13,6 +13,7 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/slab.h> +#include <asm/unaligned.h> #include "hpfs.h" @@ -51,18 +52,16 @@ struct hpfs_inode_info { unsigned i_disk_sec; /* (files) minimalist cache of alloc info */ unsigned i_n_secs; /* (files) minimalist cache of alloc info */ unsigned i_ea_size; /* size of extended attributes */ - unsigned i_conv : 2; /* (files) crlf->newline hackery */ unsigned i_ea_mode : 1; /* file's permission is stored in ea */ unsigned i_ea_uid : 1; /* file's uid is stored in ea */ unsigned i_ea_gid : 1; /* file's gid is stored in ea */ unsigned i_dirty : 1; - struct mutex i_mutex; - struct mutex i_parent_mutex; loff_t **i_rddir_off; struct inode vfs_inode; }; struct hpfs_sb_info { + struct mutex hpfs_mutex; /* global hpfs lock */ ino_t sb_root; /* inode number of root dir */ unsigned sb_fs_size; /* file system size, sectors */ unsigned sb_bitmaps; /* sector number of bitmap list */ @@ -74,7 +73,6 @@ struct hpfs_sb_info { uid_t sb_uid; /* uid from mount options */ gid_t sb_gid; /* gid from mount options */ umode_t sb_mode; /* mode from mount options */ - unsigned sb_conv : 2; /* crlf->newline hackery */ unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */ @@ -87,20 +85,9 @@ struct hpfs_sb_info { unsigned *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ - struct mutex hpfs_creation_de; /* when creating dirents, nobody else - can alloc blocks */ - /*unsigned sb_mounting : 1;*/ int sb_timeshift; }; -/* - * conv= options - */ - -#define CONV_BINARY 0 /* no conversion */ -#define CONV_TEXT 1 /* crlf->newline */ -#define CONV_AUTO 2 /* decide based on file contents */ - /* Four 512-byte buffers and the 2k block obtained by concatenating them */ struct quad_buffer_head { @@ -113,7 +100,7 @@ struct quad_buffer_head { static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) { CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); - return *(dnode_secno *) ((void *) de + de->length - 4); + return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4)); } /* The first dir entry in a dnode */ @@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) { - CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free)); - return (void *) dnode + dnode->first_free; + CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free))); + return (void *) dnode + le32_to_cpu(dnode->first_free); } /* The dir entry after dir entry de */ static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) { - CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length)); - return (void *) de + de->length; + CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length))); + return (void *) de + le16_to_cpu(de->length); } static inline struct extended_attribute *fnode_ea(struct fnode *fnode) { - return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s); + return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s)); } static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode) { - return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s); + return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s)); +} + +static unsigned ea_valuelen(struct extended_attribute *ea) +{ + return ea->valuelen_lo + 256 * ea->valuelen_hi; } static inline struct extended_attribute *next_ea(struct extended_attribute *ea) { - return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen); + return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea)); } static inline secno ea_sec(struct extended_attribute *ea) { - return *(secno *)((char *)ea + 9 + ea->namelen); + return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen))); } static inline secno ea_len(struct extended_attribute *ea) { - return *(secno *)((char *)ea + 5 + ea->namelen); + return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen))); } static inline char *ea_data(struct extended_attribute *ea) @@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) dst->not_8x3 = n; } -static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n) +static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n) { int i; if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; - if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1; + if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1; for (i = 1; i < n; i++) - if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1)) + if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1)) return i + 1; return 0; } @@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n) /* alloc.c */ int hpfs_chk_sectors(struct super_block *, secno, int, char *); -secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int); +secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int); int hpfs_alloc_if_possible(struct super_block *, secno); void hpfs_free_sectors(struct super_block *, secno, unsigned); int hpfs_check_free_dnodes(struct super_block *, int); void hpfs_free_dnode(struct super_block *, secno); -struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int); +struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); @@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno); /* buffer.c */ -void hpfs_lock_creation(struct super_block *); -void hpfs_unlock_creation(struct super_block *); void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); @@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *); struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, const unsigned char *, unsigned, secno); int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned, - struct hpfs_dirent *, int); + struct hpfs_dirent *); int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int); void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *); dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno); @@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned, const unsigned char *, unsigned, int); int hpfs_is_name_long(const unsigned char *, unsigned); void hpfs_adjust_length(const unsigned char *, unsigned *); -void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned); /* namei.c */ @@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t) /* * Locking: * - * hpfs_lock() is a leftover from the big kernel lock. - * Right now, these functions are empty and only left - * for documentation purposes. The file system no longer - * works on SMP systems, so the lock is not needed - * any more. + * hpfs_lock() locks the whole filesystem. It must be taken + * on any method called by the VFS. * - * If someone is interested in making it work again, this - * would be the place to start by adding a per-superblock - * mutex and fixing all the bugs and performance issues - * caused by that. + * We don't do any per-file locking anymore, it is hard to + * review and HPFS is not performance-sensitive anyway. */ static inline void hpfs_lock(struct super_block *s) { + struct hpfs_sb_info *sbi = hpfs_sb(s); + mutex_lock(&sbi->hpfs_mutex); } static inline void hpfs_unlock(struct super_block *s) { + struct hpfs_sb_info *sbi = hpfs_sb(s); + mutex_unlock(&sbi->hpfs_mutex); +} + +static inline void hpfs_lock_assert(struct super_block *s) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex)); } diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 87f1f787e767..338cd8368451 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i) i->i_uid = hpfs_sb(sb)->sb_uid; i->i_gid = hpfs_sb(sb)->sb_gid; i->i_mode = hpfs_sb(sb)->sb_mode; - hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv; i->i_size = -1; i->i_blocks = -1; @@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i) i->i_mode |= S_IFDIR; i->i_op = &hpfs_dir_iops; i->i_fop = &hpfs_dir_ops; - hpfs_inode->i_parent_dir = fnode->up; - hpfs_inode->i_dno = fnode->u.external[0].disk_secno; + hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up); + hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno); if (hpfs_sb(sb)->sb_chk >= 2) { struct buffer_head *bh0; if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0); @@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i) i->i_op = &hpfs_file_iops; i->i_fop = &hpfs_file_ops; i->i_nlink = 1; - i->i_size = fnode->file_size; + i->i_size = le32_to_cpu(fnode->file_size); i->i_blocks = ((i->i_size + 511) >> 9) + 1; i->i_data.a_ops = &hpfs_aops; hpfs_i(i)->mmu_private = i->i_size; @@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i) static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) { struct hpfs_inode_info *hpfs_inode = hpfs_i(i); - /*if (fnode->acl_size_l || fnode->acl_size_s) { + /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) { Some unknown structures like ACL may be in fnode, we'd better not overwrite them hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); @@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i) kfree(hpfs_inode->i_rddir_off); hpfs_inode->i_rddir_off = NULL; } - mutex_lock(&hpfs_inode->i_parent_mutex); if (!i->i_nlink) { - mutex_unlock(&hpfs_inode->i_parent_mutex); return; } parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir); @@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i) hpfs_read_inode(parent); unlock_new_inode(parent); } - mutex_lock(&hpfs_inode->i_mutex); hpfs_write_inode_nolock(i); - mutex_unlock(&hpfs_inode->i_mutex); iput(parent); - } else { - mark_inode_dirty(i); } - mutex_unlock(&hpfs_inode->i_parent_mutex); } void hpfs_write_inode_nolock(struct inode *i) @@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i) } } else de = NULL; if (S_ISREG(i->i_mode)) { - fnode->file_size = i->i_size; - if (de) de->file_size = i->i_size; + fnode->file_size = cpu_to_le32(i->i_size); + if (de) de->file_size = cpu_to_le32(i->i_size); } else if (S_ISDIR(i->i_mode)) { - fnode->file_size = 0; - if (de) de->file_size = 0; + fnode->file_size = cpu_to_le32(0); + if (de) de->file_size = cpu_to_le32(0); } hpfs_write_inode_ea(i, fnode); if (de) { - de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); - de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); - de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); + de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec)); + de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec)); + de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec)); de->read_only = !(i->i_mode & 0222); - de->ea_size = hpfs_inode->i_ea_size; + de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); } if (S_ISDIR(i->i_mode)) { if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) { - de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); - de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); - de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); + de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec)); + de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec)); + de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec)); de->read_only = !(i->i_mode & 0222); - de->ea_size = /*hpfs_inode->i_ea_size*/0; - de->file_size = 0; + de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0); + de->file_size = cpu_to_le32(0); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); } else @@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) hpfs_lock(inode->i_sb); if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) goto out_unlock; + if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000) + goto out_unlock; + if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000) + goto out_unlock; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) goto out_unlock; @@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) } setattr_copy(inode, attr); - mark_inode_dirty(inode); hpfs_write_inode(inode); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 840d033ecee8..a790821366a7 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } - sec = hpfs_sb(s)->sb_bmp_dir[bmp_block]; + sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); return NULL; @@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) struct code_page_data *cpd; struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); if (!cp) return NULL; - if (cp->magic != CP_DIR_MAGIC) { - printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic); + if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) { + printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic)); brelse(bh); return NULL; } - if (!cp->n_code_pages) { + if (!le32_to_cpu(cp->n_code_pages)) { printk("HPFS: n_code_pages == 0\n"); brelse(bh); return NULL; } - cpds = cp->array[0].code_page_data; - cpi = cp->array[0].index; + cpds = le32_to_cpu(cp->array[0].code_page_data); + cpi = le16_to_cpu(cp->array[0].index); brelse(bh); if (cpi >= 3) { @@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) } if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; - if ((unsigned)cpd->offs[cpi] > 0x178) { + if (le16_to_cpu(cpd->offs[cpi]) > 0x178) { printk("HPFS: Code page index out of sector\n"); brelse(bh); return NULL; } - ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6; + ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6; if (!(cp_table = kmalloc(256, GFP_KERNEL))) { printk("HPFS: out of memory for code page table\n"); brelse(bh); @@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea if (hpfs_sb(s)->sb_chk) { struct extended_attribute *ea; struct extended_attribute *ea_end; - if (fnode->magic != FNODE_MAGIC) { + if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) { hpfs_error(s, "bad magic on fnode %08lx", (unsigned long)ino); goto bail; @@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea (unsigned long)ino); goto bail; } - if (fnode->btree.first_free != + if (le16_to_cpu(fnode->btree.first_free) != 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", @@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea goto bail; } } - if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 || - (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) { + if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 || + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) { hpfs_error(s, "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", (unsigned long)ino, - fnode->ea_offs, fnode->ea_size_s); + le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); goto bail; } ea = fnode_ea(fnode); @@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { - if (anode->magic != ANODE_MAGIC || anode->self != ano) { + if (le32_to_cpu(anode->magic) != ANODE_MAGIC) { hpfs_error(s, "bad magic on anode %08x", ano); goto bail; } + if (le32_to_cpu(anode->self) != ano) { + hpfs_error(s, "self pointer invalid on anode %08x", ano); + goto bail; + } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != (anode->btree.internal ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } - if (anode->btree.first_free != + if (le16_to_cpu(anode->btree.first_free) != 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; @@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, unsigned p, pp = 0; unsigned char *d = (unsigned char *)dnode; int b = 0; - if (dnode->magic != DNODE_MAGIC) { + if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) { hpfs_error(s, "bad magic on dnode %08x", secno); goto bail; } - if (dnode->self != secno) - hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self); + if (le32_to_cpu(dnode->self) != secno) + hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self)); /* Check dirents - bad dirents would cause infinite loops or shooting to memory */ - if (dnode->first_free > 2048/* || dnode->first_free < 84*/) { - hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free); + if (le32_to_cpu(dnode->first_free) > 2048) { + hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free)); goto bail; } - for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) { + for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) { struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); - if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) { + if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) { hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } - if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) { - if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok; + if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { + if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } @@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, pp = p; } - if (p != dnode->first_free) { + if (p != le32_to_cpu(dnode->first_free)) { hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); goto bail; } @@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino) if (!fnode) return 0; - dno = fnode->u.external[0].disk_secno; + dno = le32_to_cpu(fnode->u.external[0].disk_secno); brelse(bh); return dno; } diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c index f24736d7a439..9acdf338def0 100644 --- a/fs/hpfs/name.c +++ b/fs/hpfs/name.c @@ -8,39 +8,6 @@ #include "hpfs_fn.h" -static const char *text_postfix[]={ -".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF", -".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS", -".RC", ".TEX", ".TXT", ".Y", ""}; - -static const char *text_prefix[]={ -"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ", -"MAKEFILE", "READ.ME", "README", "TERMCAP", ""}; - -void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len) -{ - struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); - int i; - if (hpfs_inode->i_conv != CONV_AUTO) return; - for (i = 0; *text_postfix[i]; i++) { - int l = strlen(text_postfix[i]); - if (l <= len) - if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0)) - goto text; - } - for (i = 0; *text_prefix[i]; i++) { - int l = strlen(text_prefix[i]); - if (l <= len) - if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0)) - goto text; - } - hpfs_inode->i_conv = CONV_BINARY; - return; - text: - hpfs_inode->i_conv = CONV_TEXT; - return; -} - static inline int not_allowed_char(unsigned char c) { return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' || diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index d5f8c8a19023..1f05839c27a7 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); if (!fnode) goto bail; - dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1); + dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0); if (!dnode) goto bail1; memset(&dee, 0, sizeof dee); @@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!(mode & 0222)) dee.read_only = 1; /*dee.archive = 0;*/ dee.hidden = name[0] == '.'; - dee.fnode = fno; - dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); + dee.fnode = cpu_to_le32(fno); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); result = new_inode(dir->i_sb); if (!result) goto bail2; @@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; hpfs_i(result)->i_dno = dno; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); + result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); result->i_ctime.tv_nsec = 0; result->i_mtime.tv_nsec = 0; result->i_atime.tv_nsec = 0; @@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (dee.read_only) result->i_mode &= ~0222; - mutex_lock(&hpfs_i(dir)->i_mutex); - r = hpfs_add_dirent(dir, name, len, &dee, 0); + r = hpfs_add_dirent(dir, name, len, &dee); if (r == 1) goto bail3; if (r == -1) { @@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); - fnode->up = dir->i_ino; + fnode->up = cpu_to_le32(dir->i_ino); fnode->dirflag = 1; fnode->btree.n_free_nodes = 7; fnode->btree.n_used_nodes = 1; - fnode->btree.first_free = 0x14; - fnode->u.external[0].disk_secno = dno; - fnode->u.external[0].file_secno = -1; + fnode->btree.first_free = cpu_to_le16(0x14); + fnode->u.external[0].disk_secno = cpu_to_le32(dno); + fnode->u.external[0].file_secno = cpu_to_le32(-1); dnode->root_dnode = 1; - dnode->up = fno; + dnode->up = cpu_to_le32(fno); de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0); - de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds()); + de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); if (!(mode & 0222)) de->read_only = 1; de->first = de->directory = 1; /*de->hidden = de->system = 0;*/ - de->fnode = fno; + de->fnode = cpu_to_le32(fno); mark_buffer_dirty(bh); brelse(bh); hpfs_mark_4buffers_dirty(&qbh0); @@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) hpfs_write_inode_nolock(result); } d_instantiate(dentry, result); - mutex_unlock(&hpfs_i(dir)->i_mutex); hpfs_unlock(dir->i_sb); return 0; bail3: - mutex_unlock(&hpfs_i(dir)->i_mutex); iput(result); bail2: hpfs_brelse4(&qbh0); @@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc if (!(mode & 0222)) dee.read_only = 1; dee.archive = 1; dee.hidden = name[0] == '.'; - dee.fnode = fno; - dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); + dee.fnode = cpu_to_le32(fno); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); result = new_inode(dir->i_sb); if (!result) @@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc result->i_op = &hpfs_file_iops; result->i_fop = &hpfs_file_ops; result->i_nlink = 1; - hpfs_decide_conv(result, name, len); hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); + result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); result->i_ctime.tv_nsec = 0; result->i_mtime.tv_nsec = 0; result->i_atime.tv_nsec = 0; @@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc result->i_data.a_ops = &hpfs_aops; hpfs_i(result)->mmu_private = 0; - mutex_lock(&hpfs_i(dir)->i_mutex); - r = hpfs_add_dirent(dir, name, len, &dee, 0); + r = hpfs_add_dirent(dir, name, len, &dee); if (r == 1) goto bail2; if (r == -1) { @@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc } fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); - fnode->up = dir->i_ino; + fnode->up = cpu_to_le32(dir->i_ino); mark_buffer_dirty(bh); brelse(bh); @@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc hpfs_write_inode_nolock(result); } d_instantiate(dentry, result); - mutex_unlock(&hpfs_i(dir)->i_mutex); hpfs_unlock(dir->i_sb); return 0; bail2: - mutex_unlock(&hpfs_i(dir)->i_mutex); iput(result); bail1: brelse(bh); @@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t if (!(mode & 0222)) dee.read_only = 1; dee.archive = 1; dee.hidden = name[0] == '.'; - dee.fnode = fno; - dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); + dee.fnode = cpu_to_le32(fno); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); result = new_inode(dir->i_sb); if (!result) @@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t hpfs_init_inode(result); result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); + result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); result->i_ctime.tv_nsec = 0; result->i_mtime.tv_nsec = 0; result->i_atime.tv_nsec = 0; @@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t result->i_blocks = 1; init_special_inode(result, mode, rdev); - mutex_lock(&hpfs_i(dir)->i_mutex); - r = hpfs_add_dirent(dir, name, len, &dee, 0); + r = hpfs_add_dirent(dir, name, len, &dee); if (r == 1) goto bail2; if (r == -1) { @@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t } fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); - fnode->up = dir->i_ino; + fnode->up = cpu_to_le32(dir->i_ino); mark_buffer_dirty(bh); insert_inode_hash(result); hpfs_write_inode_nolock(result); d_instantiate(dentry, result); - mutex_unlock(&hpfs_i(dir)->i_mutex); brelse(bh); hpfs_unlock(dir->i_sb); return 0; bail2: - mutex_unlock(&hpfs_i(dir)->i_mutex); iput(result); bail1: brelse(bh); @@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy memset(&dee, 0, sizeof dee); dee.archive = 1; dee.hidden = name[0] == '.'; - dee.fnode = fno; - dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); + dee.fnode = cpu_to_le32(fno); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); result = new_inode(dir->i_sb); if (!result) @@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy result->i_ino = fno; hpfs_init_inode(result); hpfs_i(result)->i_parent_dir = dir->i_ino; - result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); + result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); result->i_ctime.tv_nsec = 0; result->i_mtime.tv_nsec = 0; result->i_atime.tv_nsec = 0; @@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy result->i_op = &page_symlink_inode_operations; result->i_data.a_ops = &hpfs_symlink_aops; - mutex_lock(&hpfs_i(dir)->i_mutex); - r = hpfs_add_dirent(dir, name, len, &dee, 0); + r = hpfs_add_dirent(dir, name, len, &dee); if (r == 1) goto bail2; if (r == -1) { @@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy } fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); - fnode->up = dir->i_ino; + fnode->up = cpu_to_le32(dir->i_ino); hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink)); mark_buffer_dirty(bh); brelse(bh); @@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy hpfs_write_inode_nolock(result); d_instantiate(dentry, result); - mutex_unlock(&hpfs_i(dir)->i_mutex); hpfs_unlock(dir->i_sb); return 0; bail2: - mutex_unlock(&hpfs_i(dir)->i_mutex); iput(result); bail1: brelse(bh); @@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct hpfs_dirent *de; struct inode *inode = dentry->d_inode; dnode_secno dno; - fnode_secno fno; int r; int rep = 0; int err; @@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) hpfs_lock(dir->i_sb); hpfs_adjust_length(name, &len); again: - mutex_lock(&hpfs_i(inode)->i_parent_mutex); - mutex_lock(&hpfs_i(dir)->i_mutex); err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) @@ -397,7 +381,6 @@ again: if (de->directory) goto out1; - fno = de->fnode; r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); switch (r) { case 1: @@ -410,8 +393,6 @@ again: if (rep++) break; - mutex_unlock(&hpfs_i(dir)->i_mutex); - mutex_unlock(&hpfs_i(inode)->i_parent_mutex); dentry_unhash(dentry); if (!d_unhashed(dentry)) { dput(dentry); @@ -445,8 +426,6 @@ again: out1: hpfs_brelse4(&qbh); out: - mutex_unlock(&hpfs_i(dir)->i_mutex); - mutex_unlock(&hpfs_i(inode)->i_parent_mutex); hpfs_unlock(dir->i_sb); return err; } @@ -459,15 +438,12 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) struct hpfs_dirent *de; struct inode *inode = dentry->d_inode; dnode_secno dno; - fnode_secno fno; int n_items = 0; int err; int r; hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); - mutex_lock(&hpfs_i(inode)->i_parent_mutex); - mutex_lock(&hpfs_i(dir)->i_mutex); err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) @@ -486,7 +462,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) if (n_items) goto out1; - fno = de->fnode; r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); switch (r) { case 1: @@ -505,8 +480,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) out1: hpfs_brelse4(&qbh); out: - mutex_unlock(&hpfs_i(dir)->i_mutex); - mutex_unlock(&hpfs_i(inode)->i_parent_mutex); hpfs_unlock(dir->i_sb); return err; } @@ -568,12 +541,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, hpfs_lock(i->i_sb); /* order doesn't matter, due to VFS exclusion */ - mutex_lock(&hpfs_i(i)->i_parent_mutex); - if (new_inode) - mutex_lock(&hpfs_i(new_inode)->i_parent_mutex); - mutex_lock(&hpfs_i(old_dir)->i_mutex); - if (new_dir != old_dir) - mutex_lock(&hpfs_i(new_dir)->i_mutex); /* Erm? Moving over the empty non-busy directory is perfectly legal */ if (new_inode && S_ISDIR(new_inode->i_mode)) { @@ -610,9 +577,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dir == old_dir) hpfs_brelse4(&qbh); - hpfs_lock_creation(i->i_sb); - if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) { - hpfs_unlock_creation(i->i_sb); + if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) { if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!"); err = r == 1 ? -ENOSPC : -EFSERROR; if (new_dir != old_dir) hpfs_brelse4(&qbh); @@ -621,20 +586,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dir == old_dir) if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) { - hpfs_unlock_creation(i->i_sb); hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2"); err = -ENOENT; goto end1; } if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) { - hpfs_unlock_creation(i->i_sb); hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent"); err = r == 2 ? -ENOSPC : -EFSERROR; goto end1; } - hpfs_unlock_creation(i->i_sb); - + end: hpfs_i(i)->i_parent_dir = new_dir->i_ino; if (S_ISDIR(i->i_mode)) { @@ -642,22 +604,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(old_dir); } if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) { - fnode->up = new_dir->i_ino; + fnode->up = cpu_to_le32(new_dir->i_ino); fnode->len = new_len; memcpy(fnode->name, new_name, new_len>15?15:new_len); if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len); mark_buffer_dirty(bh); brelse(bh); } - hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv; - hpfs_decide_conv(i, new_name, new_len); end1: - if (old_dir != new_dir) - mutex_unlock(&hpfs_i(new_dir)->i_mutex); - mutex_unlock(&hpfs_i(old_dir)->i_mutex); - mutex_unlock(&hpfs_i(i)->i_parent_mutex); - if (new_inode) - mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex); hpfs_unlock(i->i_sb); return err; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index c89b40808587..98580a3b5005 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -18,15 +18,16 @@ /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ -static void mark_dirty(struct super_block *s) +static void mark_dirty(struct super_block *s, int remount) { - if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) { + if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) { struct buffer_head *bh; struct hpfs_spare_block *sb; if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { sb->dirty = 1; sb->old_wrote = 0; mark_buffer_dirty(bh); + sync_dirty_buffer(bh); brelse(bh); } } @@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s) struct buffer_head *bh; struct hpfs_spare_block *sb; if (s->s_flags & MS_RDONLY) return; + sync_blockdev(s->s_bdev); if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error; sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error; mark_buffer_dirty(bh); + sync_dirty_buffer(bh); brelse(bh); } } @@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...) if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { printk("; crashing the system because you wanted it\n"); - mark_dirty(s); + mark_dirty(s, 0); panic("HPFS panic"); } else if (hpfs_sb(s)->sb_err == 1) { if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n"); else { printk("; remounting read-only\n"); - mark_dirty(s); + mark_dirty(s, 0); s->s_flags |= MS_RDONLY; } } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n"); @@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); + hpfs_lock(s); + unmark_dirty(s); + hpfs_unlock(s); + kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); - unmark_dirty(s); s->s_fs_info = NULL; kfree(sbi); } @@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s) n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; count = 0; for (n = 0; n < n_bands; n++) - count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]); + count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); return count; } @@ -188,8 +194,6 @@ static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; - mutex_init(&ei->i_mutex); - mutex_init(&ei->i_parent_mutex); inode_init_once(&ei->vfs_inode); } @@ -218,7 +222,6 @@ static void destroy_inodecache(void) enum { Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis, - Opt_conv_binary, Opt_conv_text, Opt_conv_auto, Opt_check_none, Opt_check_normal, Opt_check_strict, Opt_err_cont, Opt_err_ro, Opt_err_panic, Opt_eas_no, Opt_eas_ro, Opt_eas_rw, @@ -233,9 +236,6 @@ static const match_table_t tokens = { {Opt_umask, "umask=%o"}, {Opt_case_lower, "case=lower"}, {Opt_case_asis, "case=asis"}, - {Opt_conv_binary, "conv=binary"}, - {Opt_conv_text, "conv=text"}, - {Opt_conv_auto, "conv=auto"}, {Opt_check_none, "check=none"}, {Opt_check_normal, "check=normal"}, {Opt_check_strict, "check=strict"}, @@ -253,7 +253,7 @@ static const match_table_t tokens = { }; static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, - int *lowercase, int *conv, int *eas, int *chk, int *errs, + int *lowercase, int *eas, int *chk, int *errs, int *chkdsk, int *timeshift) { char *p; @@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, case Opt_case_asis: *lowercase = 0; break; - case Opt_conv_binary: - *conv = CONV_BINARY; - break; - case Opt_conv_text: - *conv = CONV_TEXT; - break; - case Opt_conv_auto: - *conv = CONV_AUTO; - break; case Opt_check_none: *chk = 0; break; @@ -370,9 +361,6 @@ HPFS filesystem options:\n\ umask=xxx set mode of files that don't have mode specified in eas\n\ case=lower lowercase all files\n\ case=asis do not lowercase files (default)\n\ - conv=binary do not convert CR/LF -> LF (default)\n\ - conv=auto convert only files with known text extensions\n\ - conv=text convert all files\n\ check=none no fs checks - kernel may crash on corrupted filesystem\n\ check=normal do some checks - it should not crash (default)\n\ check=strict do extra time-consuming checks, used for debugging\n\ @@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) uid_t uid; gid_t gid; umode_t umask; - int lowercase, conv, eas, chk, errs, chkdsk, timeshift; + int lowercase, eas, chk, errs, chkdsk, timeshift; int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); @@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) lock_super(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; - lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; + lowercase = sbi->sb_lowercase; eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk; errs = sbi->sb_err; timeshift = sbi->sb_timeshift; - if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv, + if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &eas, &chk, &errs, &chkdsk, ×hift))) { printk("HPFS: bad mount options.\n"); goto out_err; @@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) sbi->sb_uid = uid; sbi->sb_gid = gid; sbi->sb_mode = 0777 & ~umask; - sbi->sb_lowercase = lowercase; sbi->sb_conv = conv; + sbi->sb_lowercase = lowercase; sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; sbi->sb_err = errs; sbi->sb_timeshift = timeshift; - if (!(*flags & MS_RDONLY)) mark_dirty(s); + if (!(*flags & MS_RDONLY)) mark_dirty(s, 1); replace_mount_options(s, new_opts); @@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) uid_t uid; gid_t gid; umode_t umask; - int lowercase, conv, eas, chk, errs, chkdsk, timeshift; + int lowercase, eas, chk, errs, chkdsk, timeshift; dnode_secno root_dno; struct hpfs_dirent *de = NULL; @@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) int o; - if (num_possible_cpus() > 1) { - printk(KERN_ERR "HPFS is not SMP safe\n"); - return -EINVAL; - } - save_mount_options(s, options); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); @@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_bmp_dir = NULL; sbi->sb_cp_table = NULL; - mutex_init(&sbi->hpfs_creation_de); + mutex_init(&sbi->hpfs_mutex); + hpfs_lock(s); uid = current_uid(); gid = current_gid(); umask = current_umask(); lowercase = 0; - conv = CONV_BINARY; eas = 2; chk = 1; errs = 1; chkdsk = 1; timeshift = 0; - if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv, + if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &eas, &chk, &errs, &chkdsk, ×hift))) { printk("HPFS: bad mount options.\n"); goto bail0; @@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3; /* Check magics */ - if (/*bootblock->magic != BB_MAGIC - ||*/ superblock->magic != SB_MAGIC - || spareblock->magic != SP_MAGIC) { + if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC + ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC + || le32_to_cpu(spareblock->magic) != SP_MAGIC) { if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n"); goto bail4; } @@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) s->s_op = &hpfs_sops; s->s_d_op = &hpfs_dentry_operations; - sbi->sb_root = superblock->root; - sbi->sb_fs_size = superblock->n_sectors; - sbi->sb_bitmaps = superblock->bitmaps; - sbi->sb_dirband_start = superblock->dir_band_start; - sbi->sb_dirband_size = superblock->n_dir_band; - sbi->sb_dmap = superblock->dir_band_bitmap; + sbi->sb_root = le32_to_cpu(superblock->root); + sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors); + sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps); + sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start); + sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band); + sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap); sbi->sb_uid = uid; sbi->sb_gid = gid; sbi->sb_mode = 0777 & ~umask; sbi->sb_n_free = -1; sbi->sb_n_free_dnodes = -1; sbi->sb_lowercase = lowercase; - sbi->sb_conv = conv; sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; @@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_max_fwd_alloc = 0xffffff; /* Load bitmap directory */ - if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps))) + if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; /* Check for general fs errors*/ @@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) mark_buffer_dirty(bh2); } - if (spareblock->hotfixes_used || spareblock->n_spares_used) { + if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); - mark_dirty(s); + mark_dirty(s, 0); goto bail4; } hpfs_error(s, "hotfixes not supported here, try chkdsk"); if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n"); else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n"); } - if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) { + if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) { if (errs >= 2) { printk("HPFS: Spare dnodes used, try chkdsk\n"); - mark_dirty(s); + mark_dirty(s, 0); goto bail4; } hpfs_error(s, "warning: spare dnodes used, try chkdsk"); @@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) } if (chk) { unsigned a; - if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band || - superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) { + if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) || + le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) { hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x", - superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band); + le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band)); goto bail4; } a = sbi->sb_dirband_size; sbi->sb_dirband_size = 0; - if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") || - hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") || - hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) { - mark_dirty(s); + if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") || + hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") || + hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) { + mark_dirty(s, 0); goto bail4; } sbi->sb_dirband_size = a; } else printk("HPFS: You really don't want any checks? You are crazy...\n"); /* Load code page table */ - if (spareblock->n_code_pages) - if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir))) + if (le32_to_cpu(spareblock->n_code_pages)) + if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir)))) printk("HPFS: Warning: code page support is disabled\n"); brelse(bh2); @@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) if (!de) hpfs_error(s, "unable to find root dir"); else { - root->i_atime.tv_sec = local_to_gmt(s, de->read_date); + root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date)); root->i_atime.tv_nsec = 0; - root->i_mtime.tv_sec = local_to_gmt(s, de->write_date); + root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date)); root->i_mtime.tv_nsec = 0; - root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date); + root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); root->i_ctime.tv_nsec = 0; - hpfs_i(root)->i_ea_size = de->ea_size; + hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) root->i_size = 2048; @@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_blocks = 5; hpfs_brelse4(&qbh); } + hpfs_unlock(s); return 0; bail4: brelse(bh2); @@ -681,6 +664,7 @@ bail3: brelse(bh1); bail2: brelse(bh0); bail1: bail0: + hpfs_unlock(s); kfree(sbi->sb_bmp_dir); kfree(sbi->sb_cp_table); s->s_fs_info = NULL; diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 0a0a66d98cce..f7684483785e 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned long group, group_offset; int i, j, n, ret; - for (i = 0; i < nitems; i += n) { + for (i = 0; i < nitems; i = j) { group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index ac0ccb5026a2..19d6750d1d6c 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, goto fail; } + /* Check that sizeof_partition_entry has the correct value */ + if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) { + pr_debug("GUID Partitition Entry Size check failed.\n"); + goto fail; + } + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) goto fail; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2e7addfd9803..318d8654989b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) int flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; - unsigned long start; + unsigned long start, end; dev_t dev = 0; int len; @@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (vma->vm_flags & VM_GROWSDOWN) - if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) - start += PAGE_SIZE; + if (stack_guard_page_start(vma, start)) + start += PAGE_SIZE; + end = vma->vm_end; + if (stack_guard_page_end(vma, end)) + end -= PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, - vma->vm_end, + end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index e4f9c1b0836c..3e898a48122d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -926,6 +926,7 @@ restart: XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { + done = 1; rcu_read_unlock(); break; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index acdb92f14d51..5fc2380092c8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -346,20 +346,23 @@ xfs_ail_delete( */ STATIC void xfs_ail_worker( - struct work_struct *work) + struct work_struct *work) { - struct xfs_ail *ailp = container_of(to_delayed_work(work), + struct xfs_ail *ailp = container_of(to_delayed_work(work), struct xfs_ail, xa_work); - long tout; - xfs_lsn_t target = ailp->xa_target; - xfs_lsn_t lsn; - xfs_log_item_t *lip; - int flush_log, count, stuck; - xfs_mount_t *mp = ailp->xa_mount; + xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; - int push_xfsbufd = 0; + xfs_log_item_t *lip; + xfs_lsn_t lsn; + xfs_lsn_t target; + long tout = 10; + int flush_log = 0; + int stuck = 0; + int count = 0; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); + target = ailp->xa_target; xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { @@ -368,8 +371,7 @@ xfs_ail_worker( */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - ailp->xa_last_pushed_lsn = 0; - return; + goto out_done; } XFS_STATS_INC(xs_push_ail); @@ -386,8 +388,7 @@ xfs_ail_worker( * lots of contention on the AIL lists. */ lsn = lip->li_lsn; - flush_log = stuck = count = 0; - while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { + while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; /* * If we can lock the item without sleeping, unlock the AIL @@ -480,21 +481,25 @@ xfs_ail_worker( } /* assume we have more work to do in a short while */ - tout = 10; +out_done: if (!count) { /* We're past our target or empty, so idle */ ailp->xa_last_pushed_lsn = 0; /* - * Check for an updated push target before clearing the - * XFS_AIL_PUSHING_BIT. If the target changed, we've got more - * work to do. Wait a bit longer before starting that work. + * We clear the XFS_AIL_PUSHING_BIT first before checking + * whether the target has changed. If the target has changed, + * this pushes the requeue race directly onto the result of the + * atomic test/set bit, so we are guaranteed that either the + * the pusher that changed the target or ourselves will requeue + * the work (but not both). */ + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); smp_rmb(); - if (ailp->xa_target == target) { - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); + if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || + test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) return; - } + tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* @@ -553,7 +558,7 @@ xfs_ail_push( * the XFS_AIL_PUSHING_BIT. */ smp_wmb(); - ailp->xa_target = threshold_lsn; + xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); } diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index c2f93a8ae2e1..564b14aa7e16 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -86,7 +86,7 @@ static inline bool drm_mm_initialized(struct drm_mm *mm) } #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \ &(mm)->head_node.node_list, \ - node_list); + node_list) #define drm_mm_for_each_scanned_node_reverse(entry, n, mm) \ for (entry = (mm)->prev_scanned_node, \ next = entry ? list_entry(entry->node_list.next, \ diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 70e4efabe0fb..ebeb2f3ad068 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -61,7 +61,7 @@ struct flex_array { struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 22b32af1b5ec..b5a550a39a70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -37,6 +37,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int padding; }; #define FTRACE_MAX_EVENT \ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ea2dc1a2e13d..55ef181521ff 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_GET_PVINFO 57 #define KVM_CAP_PPC_IRQ_LEVEL 58 #define KVM_CAP_ASYNC_PF 59 +#define KVM_CAP_TSC_CONTROL 60 +#define KVM_CAP_GET_TSC_KHZ 61 +#define KVM_CAP_PPC_BOOKE_SREGS 62 #ifdef KVM_CAP_IRQ_ROUTING @@ -677,6 +680,9 @@ struct kvm_clock_data { #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* Available with KVM_CAP_PPC_GET_PVINFO */ #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) +/* Available with KVM_CAP_TSC_CONTROL */ +#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) +#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ab428552af8e..0bc3d372e3cb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -27,6 +27,10 @@ #include <asm/kvm_host.h> +#ifndef KVM_MMIO_SIZE +#define KVM_MMIO_SIZE 8 +#endif + /* * vcpu->requests bit members */ @@ -43,7 +47,6 @@ #define KVM_REQ_DEACTIVATE_FPU 10 #define KVM_REQ_EVENT 11 #define KVM_REQ_APF_HALT 12 -#define KVM_REQ_NMI 13 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 @@ -133,7 +136,8 @@ struct kvm_vcpu { int mmio_read_completed; int mmio_is_write; int mmio_size; - unsigned char mmio_data[8]; + int mmio_index; + unsigned char mmio_data[KVM_MMIO_SIZE]; gpa_t mmio_phys_addr; #endif @@ -292,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) } #define kvm_for_each_vcpu(idx, vcpup, kvm) \ - for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ - idx < atomic_read(&kvm->online_vcpus) && vcpup; \ - vcpup = kvm_get_vcpu(kvm, ++idx)) + for (idx = 0; \ + idx < atomic_read(&kvm->online_vcpus) && \ + (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ + idx++) int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); @@ -365,7 +370,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int memslot_id(struct kvm *kvm, gfn_t gfn); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); @@ -597,6 +601,11 @@ static inline void kvm_guest_exit(void) current->flags &= ~PF_VCPU; } +static inline int memslot_id(struct kvm *kvm, gfn_t gfn) +{ + return gfn_to_memslot(kvm, gfn)->id; +} + static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 2348db26bc3d..6507dde38b16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,33 @@ int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); /* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) { return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); } +static inline int stack_guard_page_start(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSDOWN) && + (vma->vm_start == addr) && + !vma_growsdown(vma->vm_prev, addr); +} + +/* Is the vma a continuation of the stack vma below it? */ +static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); +} + +static inline int stack_guard_page_end(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSUP) && + (vma->vm_end == addr) && + !vma_growsup(vma->vm_next, addr); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3a5c4449fd36..8b97308e65df 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -948,7 +948,7 @@ do { \ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + __pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a1147e5dd245..9178d5cc0b01 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) child->ptrace = current->ptrace; __ptrace_link(child, current->parent); } + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_set(&child->ptrace_bp_refcnt, 1); +#endif } /** @@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -#endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern int ptrace_get_breakpoints(struct task_struct *tsk); +extern void ptrace_put_breakpoints(struct task_struct *tsk); +#else +static inline void ptrace_put_breakpoints(struct task_struct *tsk) { } +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +#endif /* __KERNEL */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 18d63cea2848..781abd137673 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1537,6 +1537,9 @@ struct task_struct { unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d516f00c8e0f..86aefed6140b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -791,6 +791,7 @@ struct ip_vs_app { /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ + int enable; /* enable like nf_hooks do */ /* * Hash table: for real service lookups */ @@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) atomic_inc(&ctl_cp->n_control); } +/* + * IPVS netns init & cleanup functions + */ +extern int __ip_vs_estimator_init(struct net *net); +extern int __ip_vs_control_init(struct net *net); +extern int __ip_vs_protocol_init(struct net *net); +extern int __ip_vs_app_init(struct net *net); +extern int __ip_vs_conn_init(struct net *net); +extern int __ip_vs_sync_init(struct net *net); +extern void __ip_vs_conn_cleanup(struct net *net); +extern void __ip_vs_app_cleanup(struct net *net); +extern void __ip_vs_protocol_cleanup(struct net *net); +extern void __ip_vs_control_cleanup(struct net *net); +extern void __ip_vs_estimator_cleanup(struct net *net); +extern void __ip_vs_sync_cleanup(struct net *net); +extern void __ip_vs_service_cleanup(struct net *net); /* * IPVS application functions diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6ae4bc5ce8a7..20afeaa39395 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -324,6 +324,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sk_buff *skb); + int (*output_finish)(struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1454,6 +1455,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); +extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_extract_header(struct sk_buff *skb); @@ -1470,6 +1472,7 @@ extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); diff --git a/init/Kconfig b/init/Kconfig index 2d964fa40f5b..119b9727d10b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1226,7 +1226,6 @@ config SLAB per cpu and per node queues. config SLUB - depends on BROKEN || NUMA || !DISCONTIGMEM bool "SLUB (Unqueued Allocator)" help SLUB is a slab allocator that minimizes cache line usage diff --git a/kernel/exit.c b/kernel/exit.c index f5d2f63bae0b..8dd874181542 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1016,7 +1016,7 @@ NORET_TYPE void do_exit(long code) /* * FIXME: do that only when needed, using sched_exit tracepoint */ - flush_ptrace_hw_breakpoint(tsk); + ptrace_put_breakpoints(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0fc1eed28d27..dc7ab65f3b36 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -22,6 +22,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/regset.h> +#include <linux/hw_breakpoint.h> /* @@ -879,3 +880,19 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, return ret; } #endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +int ptrace_get_breakpoints(struct task_struct *tsk) +{ + if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt)) + return 0; + + return -1; +} + +void ptrace_put_breakpoints(struct task_struct *tsk) +{ + if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt)) + flush_ptrace_hw_breakpoint(tsk); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d38c16a06a6f..1cb49be7c7fb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1110,6 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->padding = 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e88f74fe1d4c..2fe110341359 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -116,6 +116,7 @@ static int trace_define_common_fields(void) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); + __common_field(int, padding); return ret; } diff --git a/lib/flex_array.c b/lib/flex_array.c index c0ea40ba2082..854b57bd7d9d 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear); /** * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @end: index of last (inclusive) element for which space is allocated - * @flags: page allocation flags + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @nr_elements: number of elements for which space is allocated + * @flags: page allocation flags * * This will guarantee that no future calls to flex_array_put() * will allocate memory. It can be used if you are expecting to @@ -245,14 +245,24 @@ EXPORT_SYMBOL(flex_array_clear); * Locking must be provided by the caller. */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags) + unsigned int nr_elements, gfp_t flags) { int start_part; int end_part; int part_nr; + unsigned int end; struct flex_array_part *part; - if (start >= fa->total_nr_elements || end >= fa->total_nr_elements) + if (!start && !nr_elements) + return 0; + if (start >= fa->total_nr_elements) + return -ENOSPC; + if (!nr_elements) + return 0; + + end = start + nr_elements - 1; + + if (end >= fa->total_nr_elements) return -ENOSPC; if (elements_fit_in_base(fa)) return 0; @@ -343,6 +353,8 @@ int flex_array_shrink(struct flex_array *fa) int part_nr; int ret = 0; + if (!fa->total_nr_elements) + return 0; if (elements_fit_in_base(fa)) return ret; for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { diff --git a/mm/memory.c b/mm/memory.c index 607098d47e74..61e66f026563 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1359,7 +1359,7 @@ split_fallthrough: */ mark_page_accessed(page); } - if (flags & FOLL_MLOCK) { + if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE @@ -1412,9 +1412,8 @@ no_page_table: static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { - return (vma->vm_flags & VM_GROWSDOWN) && - (vma->vm_start == addr) && - !vma_stack_continue(vma->vm_prev, addr); + return stack_guard_page_start(vma, addr) || + stack_guard_page_end(vma, addr+PAGE_SIZE); } /** @@ -1551,13 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, continue; } - /* - * If we don't actually want the page itself, - * and it's the stack guard page, just skip it. - */ - if (!pages && stack_guard_page(vma, start)) - goto next_page; - do { struct page *page; unsigned int foll_flags = gup_flags; @@ -1574,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int ret; unsigned int fault_flags = 0; + /* For mlock, just skip the stack guard page. */ + if (foll_flags & FOLL_MLOCK) { + if (stack_guard_page(vma, start)) + goto next_page; + } if (foll_flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) diff --git a/mm/mlock.c b/mm/mlock.c index 6b55e3efe0df..516b2c2ddd5a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -162,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, VM_BUG_ON(end > vma->vm_end); VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - gup_flags = FOLL_TOUCH; + gup_flags = FOLL_TOUCH | FOLL_MLOCK; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW @@ -178,9 +178,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) gup_flags |= FOLL_FORCE; - if (vma->vm_flags & VM_LOCKED) - gup_flags |= FOLL_MLOCK; - return __get_user_pages(current, mm, addr, nr_pages, gup_flags, NULL, NULL, nonblocking); } diff --git a/mm/mmap.c b/mm/mmap.c index e27e0cf0de03..772140c53ab1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) size = address - vma->vm_start; grow = (address - vma->vm_end) >> PAGE_SHIFT; - error = acct_stack_growth(vma, size, grow); - if (!error) { - vma->vm_end = address; - perf_event_mmap(vma); + error = -ENOMEM; + if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { + error = acct_stack_growth(vma, size, grow); + if (!error) { + vma->vm_end = address; + perf_event_mmap(vma); + } } } vma_unlock_anon_vma(vma); diff --git a/mm/slub.c b/mm/slub.c index 94d2a33a866e..9d2e5e46bf09 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1940,7 +1940,7 @@ redo: * Since this is without lock semantics the protection is only against * code executing on this cpu *not* from access by other cpus. */ - if (unlikely(!this_cpu_cmpxchg_double( + if (unlikely(!irqsafe_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, object, tid, get_freepointer(s, object), next_tid(tid)))) { @@ -2145,7 +2145,7 @@ redo: set_freepointer(s, object, c->freelist); #ifdef CONFIG_CMPXCHG_LOCAL - if (unlikely(!this_cpu_cmpxchg_double( + if (unlikely(!irqsafe_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, object, next_tid(tid)))) { diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 7850412f52b7..0eb1a886b370 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -124,6 +124,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlans--; + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); + vlan_group_set_device(grp, vlan_id, NULL); if (!grp->killall) synchronize_net(); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e34ea9e5e28b..b2ff6c8d3603 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -487,9 +487,6 @@ static int vlan_dev_stop(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (vlan->flags & VLAN_FLAG_GVRP) - vlan_gvrp_request_leave(dev); - dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) diff --git a/net/9p/client.c b/net/9p/client.c index 77367745be9b..a9aa2dd66482 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -614,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) err = c->trans_mod->request(c, req); if (err < 0) { - if (err != -ERESTARTSYS) + if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto reterr; } diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index e883172f9aa2..9a70ebdec56e 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -63,7 +63,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, int nr_pages, u8 rw) { uint32_t first_page_bytes = 0; - uint32_t pdata_mapped_pages; + int32_t pdata_mapped_pages; struct trans_rpage_info *rpinfo; *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); @@ -75,14 +75,9 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, rpinfo = req->tc->private; pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, nr_pages, rw, &rpinfo->rp_data[0]); + if (pdata_mapped_pages <= 0) + return pdata_mapped_pages; - if (pdata_mapped_pages < 0) { - printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" - "nr_pages:%d\n", pdata_mapped_pages, - req->tc->pubuf, nr_pages); - pdata_mapped_pages = 0; - return -EIO; - } rpinfo->rp_nr_pages = pdata_mapped_pages; if (*pdata_off) { *pdata_len = first_page_bytes; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 893669caa8de..1a92b369c820 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1766,7 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info, newinfo->entries_size = size; - xt_compat_init_offsets(AF_INET, info->nentries); + xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } @@ -1882,7 +1882,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, struct xt_match *match; struct xt_target *wt; void *dst = NULL; - int off, pad = 0, ret = 0; + int off, pad = 0; unsigned int size_kern, entry_offset, match_size = mwt->match_size; strlcpy(name, mwt->u.name, sizeof(name)); @@ -1935,13 +1935,6 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, break; } - if (!dst) { - ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, - off + ebt_compat_entry_padsize()); - if (ret < 0) - return ret; - } - state->buf_kern_offset += match_size + off; state->buf_user_offset += match_size; pad = XT_ALIGN(size_kern) - size_kern; @@ -2016,50 +2009,6 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, return growth; } -#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__watcher; \ - \ - for (__i = e->watchers_offset; \ - __i < (e)->target_offset; \ - __i += __watcher->watcher_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __watcher = (void *)(e) + __i; \ - __ret = fn(__watcher , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->target_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - -#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__match; \ - \ - for (__i = sizeof(struct ebt_entry); \ - __i < (e)->watchers_offset; \ - __i += __match->match_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __match = (void *)(e) + __i; \ - __ret = fn(__match , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->watchers_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - /* called for all ebt_entry structures. */ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, unsigned int *total, @@ -2132,6 +2081,14 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, } } + if (state->buf_kern_start == NULL) { + unsigned int offset = buf_start - (char *) base; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); + if (ret < 0) + return ret; + } + startoff = state->buf_user_offset - startoff; BUG_ON(*total < startoff); @@ -2240,6 +2197,7 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); + xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 05f357828a2f..e15a82ccc05f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2267,6 +2267,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->more_to_follow = false; m->pool = NULL; + /* middle */ + m->middle = NULL; + + /* data */ + m->nr_pages = 0; + m->page_alignment = 0; + m->pages = NULL; + m->pagelist = NULL; + m->bio = NULL; + m->bio_iter = NULL; + m->bio_seg = 0; + m->trail = NULL; + /* front */ if (front_len) { if (front_len > PAGE_CACHE_SIZE) { @@ -2286,19 +2299,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) } m->front.iov_len = front_len; - /* middle */ - m->middle = NULL; - - /* data */ - m->nr_pages = 0; - m->page_alignment = 0; - m->pages = NULL; - m->pagelist = NULL; - m->bio = NULL; - m->bio_iter = NULL; - m->bio_seg = 0; - m->trail = NULL; - dout("ceph_msg_new %p front %d\n", m, front_len); return m; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5a80f41c0cba..6b5dda1cb5df 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -470,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, snapc, ops, use_mempool, GFP_NOFS, NULL, NULL); - if (IS_ERR(req)) - return req; + if (!req) + return NULL; /* calculate max write size */ calc_layout(osdc, vino, layout, off, plen, req, ops); diff --git a/net/core/dev.c b/net/core/dev.c index 856b6ee9a1d5..92009440d28b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1284,11 +1284,13 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - LIST_HEAD(single); + if (dev->flags & IFF_UP) { + LIST_HEAD(single); - list_add(&dev->unreg_list, &single); - dev_close_many(&single); - list_del(&single); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); + list_del(&single); + } return 0; } EXPORT_SYMBOL(dev_close); diff --git a/net/dccp/options.c b/net/dccp/options.c index f06ffcfc8d71..4b2ab657ac8e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; + if (len == 0) + goto out_invalid_option; rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, *value, value + 1, len - 1); if (rc) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a1151b8adf3c..b1d282f11be7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -223,31 +223,30 @@ static void ip_expire(unsigned long arg) if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; + const struct iphdr *iph; + int err; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; + /* skb dst is stale, drop it, and perform route lookup again */ + skb_dst_drop(head); + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (err) + goto out_rcu_unlock; + /* - * Only search router table for the head fragment, - * when defraging timeout at PRE_ROUTING HOOK. + * Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { - const struct iphdr *iph = ip_hdr(head); - int err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); - if (unlikely(err)) - goto out_rcu_unlock; - - /* - * Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (skb_rtable(head)->rt_type != RTN_LOCAL) - goto out_rcu_unlock; + if (qp->user == IP_DEFRAG_CONNTRACK_IN && + skb_rtable(head)->rt_type != RTN_LOCAL) + goto out_rcu_unlock; - } /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 34340c9c95fa..f376b05cca81 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -93,6 +93,7 @@ struct bictcp { u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ #define ACK_RATIO_SHIFT 4 +#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ @@ -398,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) u32 delay; if (icsk->icsk_ca_state == TCP_CA_Open) { - cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ca->delayed_ack += cnt; + u32 ratio = ca->delayed_ack; + + ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; + ratio += cnt; + + ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 571aa96a175c..2d51840e53a1 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -static int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER if (!skb_dst(skb)->xfrm) { @@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, skb_dst(skb)->dev, xfrm4_output_finish, + NULL, dst->dev, + x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1717c64628d1..805d63ef4340 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_tempsel = __xfrm4_init_tempsel, .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, + .output_finish = xfrm4_output_finish, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 28e74488a329..a5a4c5dd5396 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -45,6 +45,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; struct dst_entry *dst = NULL; u8 proto; struct flowi6 fl6; @@ -124,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - ip6h->version = 6; + *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8e688b3de9ab..49a91c5f5623 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -static int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; @@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb) if ((x && x->props.mode == XFRM_MODE_TUNNEL) && ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, xfrm6_output_finish); + return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } - return xfrm6_output_finish(skb); + return x->outer_mode->afinfo->output_finish(skb); } int xfrm6_output(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index afe941e9415c..248f0b2a7ee9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -178,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .output_finish = xfrm6_output_finish, .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 2dc6de13ac18..51f3af7c4743 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -static int __net_init __ip_vs_app_init(struct net *net) +int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net) return 0; } -static void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit __ip_vs_app_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_app_init, - .exit = __ip_vs_app_cleanup, -}; - int __init ip_vs_app_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_app_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c97bd45975be..d3fd91bbba49 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -static void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit __ip_vs_conn_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } -static struct pernet_operations ipvs_conn_ops = { - .init = __ip_vs_conn_init, - .exit = __ip_vs_conn_cleanup, -}; int __init ip_vs_conn_init(void) { int idx; - int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - retc = register_pernet_subsys(&ipvs_conn_ops); - /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return retc; + return 0; } void ip_vs_conn_cleanup(void) { - unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 07accf6b2401..a74dae6c5dbc 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return NF_ACCEPT; /* The packet looks wrong, ignore */ net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* Bad... Do not break raw sockets */ @@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - net = skb_net(skb); /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); if (unlikely(!pd)) @@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - net = skb_net(skb); ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp(skb, &r, hooknum); } @@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp_v6(skb, &r, hooknum); } #endif @@ -1884,19 +1903,70 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + /* Hold the beast until a service is registerd */ + ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; + + if (__ip_vs_estimator_init(net) < 0) + goto estimator_fail; + + if (__ip_vs_control_init(net) < 0) + goto control_fail; + + if (__ip_vs_protocol_init(net) < 0) + goto protocol_fail; + + if (__ip_vs_app_init(net) < 0) + goto app_fail; + + if (__ip_vs_conn_init(net) < 0) + goto conn_fail; + + if (__ip_vs_sync_init(net) < 0) + goto sync_fail; + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; +/* + * Error handling + */ + +sync_fail: + __ip_vs_conn_cleanup(net); +conn_fail: + __ip_vs_app_cleanup(net); +app_fail: + __ip_vs_protocol_cleanup(net); +protocol_fail: + __ip_vs_control_cleanup(net); +control_fail: + __ip_vs_estimator_cleanup(net); +estimator_fail: + return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { - IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); + __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ + __ip_vs_conn_cleanup(net); + __ip_vs_app_cleanup(net); + __ip_vs_protocol_cleanup(net); + __ip_vs_control_cleanup(net); + __ip_vs_estimator_cleanup(net); + IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); +} + +static void __net_exit __ip_vs_dev_cleanup(struct net *net) +{ + EnterFunction(2); + net_ipvs(net)->enable = 0; /* Disable packet reception */ + __ip_vs_sync_cleanup(net); + LeaveFunction(2); } static struct pernet_operations ipvs_core_ops = { @@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = { .size = sizeof(struct netns_ipvs), }; +static struct pernet_operations ipvs_core_dev_ops = { + .exit = __ip_vs_dev_cleanup, +}; + /* * Initialize IP Virtual Server */ @@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void) { int ret; - ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ - if (ret < 0) - return ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { @@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void) goto cleanup_conn; } + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + goto cleanup_sync; + + ret = register_pernet_device(&ipvs_core_dev_ops); + if (ret < 0) + goto cleanup_sub; + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_sync; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); + return ret; +cleanup_dev: + unregister_pernet_device(&ipvs_core_dev_ops); +cleanup_sub: + unregister_pernet_subsys(&ipvs_core_ops); cleanup_sync: ip_vs_sync_cleanup(); cleanup_conn: @@ -1964,20 +2047,20 @@ cleanup_sync: ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + unregister_pernet_device(&ipvs_core_dev_ops); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ae47090bf45f..ea722810faf3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void) } #endif + +/* Protos */ +static void __ip_vs_del_service(struct ip_vs_service *svc); + + #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ static int __ip_vs_addr_is_local_v6(struct net *net, @@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; + /* Now there is a service - full throttle */ + ipvs->enable = 1; return 0; @@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net) return 0; } +/* + * Delete service by {netns} in the service table. + * Called by __ip_vs_cleanup() + */ +void __ip_vs_service_cleanup(struct net *net) +{ + EnterFunction(2); + /* Check for "full" addressed entries */ + mutex_lock(&__ip_vs_mutex); + ip_vs_flush(net); + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); +} +/* + * Release dst hold by dst_cache + */ +static inline void +__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +{ + spin_lock_bh(&dest->dst_lock); + if (dest->dst_cache && dest->dst_cache->dev == dev) { + IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", + dev->name, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + ip_vs_dst_reset(dest); + } + spin_unlock_bh(&dest->dst_lock); + +} +/* + * Netdev event receiver + * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to + * a device that is "unregister" it must be released. + */ +static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net *net = dev_net(dev); + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + unsigned int idx; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + EnterFunction(2); + mutex_lock(&__ip_vs_mutex); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + } + + list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + + } + } + + list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + __ip_vs_dev_reset(dest, dev); + } + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); + return NOTIFY_DONE; +} /* * Zero counters in a service or all services @@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } #endif +static struct notifier_block ip_vs_dst_notifier = { + .notifier_call = ip_vs_dst_event, +}; + int __net_init __ip_vs_control_init(struct net *net) { int idx; @@ -3626,7 +3715,7 @@ err: return -ENOMEM; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -static struct pernet_operations ipvs_control_ops = { - .init = __ip_vs_control_init, - .exit = __ip_vs_control_cleanup, -}; - int __init ip_vs_control_init(void) { int idx; @@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) { - pr_err("cannot register namespace.\n"); - goto err; - } - smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - goto err_net; + goto err_sock; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); - nf_unregister_sockopt(&ip_vs_sockopts); - goto err_net; + goto err_genl; } + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + goto err_notf; + LeaveFunction(2); return 0; -err_net: - unregister_pernet_subsys(&ipvs_control_ops); -err: +err_notf: + ip_vs_genl_unregister(); +err_genl: + nf_unregister_sockopt(&ip_vs_sockopts); +err_sock: return ret; } @@ -3691,7 +3774,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 8c8766ca56ad..508cce98777c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -static int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -static void __net_exit __ip_vs_estimator_exit(struct net *net) +void __net_exit __ip_vs_estimator_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_estimator_init, - .exit = __ip_vs_estimator_exit, -}; int __init ip_vs_estimator_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_estimator_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 17484a4416ef..eb86028536fc 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -static int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit __ip_vs_protocol_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net) } } -static struct pernet_operations ipvs_proto_ops = { - .init = __ip_vs_protocol_init, - .exit = __ip_vs_protocol_cleanup, -}; - int __init ip_vs_protocol_init(void) { char protocols[64]; @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); - return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; - unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3e7961e85e9c..e292e5bddc70 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) struct socket *sock; int result; - /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + /* First create a socket move it to right name space later */ + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) int result; /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; @@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo); return 0; @@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo->buf); kfree(tinfo); @@ -1601,7 +1611,7 @@ outtinfo: outbuf: kfree(buf); outsocket: - sock_release(sock); + sk_release_kernel(sock->sk); out: return result; } @@ -1610,6 +1620,7 @@ out: int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); + int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); @@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) spin_lock_bh(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock_bh(&ipvs->sync_lock); - kthread_stop(ipvs->master_thread); + retc = kthread_stop(ipvs->master_thread); ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { if (!ipvs->backup_thread) @@ -1639,22 +1650,20 @@ int stop_sync_thread(struct net *net, int state) task_pid_nr(ipvs->backup_thread)); ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(ipvs->backup_thread); + retc = kthread_stop(ipvs->backup_thread); ipvs->backup_thread = NULL; - } else { - return -EINVAL; } /* decrease the module use count */ ip_vs_use_count_dec(); - return 0; + return retc; } /* * Initialize data struct for each netns */ -static int __net_init __ip_vs_sync_init(struct net *net) +int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1668,24 +1677,24 @@ static int __net_init __ip_vs_sync_init(struct net *net) return 0; } -static void __ip_vs_sync_cleanup(struct net *net) +void __ip_vs_sync_cleanup(struct net *net) { - stop_sync_thread(net, IP_VS_STATE_MASTER); - stop_sync_thread(net, IP_VS_STATE_BACKUP); -} + int retc; -static struct pernet_operations ipvs_sync_ops = { - .init = __ip_vs_sync_init, - .exit = __ip_vs_sync_cleanup, -}; + retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Master Daemon\n"); + retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Backup Daemon\n"); +} int __init ip_vs_sync_init(void) { - return register_pernet_subsys(&ipvs_sync_ops); + return 0; } void ip_vs_sync_cleanup(void) { - unregister_pernet_subsys(&ipvs_sync_ops); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 30bf8a167fc8..482e90c61850 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1334,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conn *ct; int err = -EINVAL; struct nf_conntrack_helper *helper; + struct nf_conn_tstamp *tstamp; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) @@ -1451,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a9adf4c6b299..8a025a585d2f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -455,6 +455,7 @@ void xt_compat_flush_offsets(u_int8_t af) vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; xt[af].number = 0; + xt[af].cur = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); @@ -473,8 +474,7 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset) else return mid ? tmp[mid - 1].delta : 0; } - WARN_ON_ONCE(1); - return 0; + return left ? tmp[left - 1].delta : 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 0a229191e55b..ae8271652efa 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t orig, nv; orig = ipv6_get_dsfield(iph); - nv = (orig & info->tos_mask) ^ info->tos_value; + nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { if (!skb_make_writable(skb, sizeof(struct iphdr))) diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 481a86fdc409..61805d7b38aa 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,11 +272,6 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - if (strcmp(par->table, "raw") == 0) { - pr_info("state is undetermined at the time of raw table\n"); - return -EINVAL; - } - ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 15792d8b6272..b4d745ea8ee1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1406,6 +1406,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; + struct xfrm_mode *inner_mode; struct dst_entry *dst_prev = NULL; struct dst_entry *dst0 = NULL; int i = 0; @@ -1436,6 +1437,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + if (!dst_prev) dst0 = dst1; else { @@ -1464,7 +1476,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = xfrm[i]->outer_mode->afinfo->output; + dst1->output = inner_mode->afinfo->output; dst1->next = dst_prev; dst_prev = dst1; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index e8a781422feb..47f1b8638df9 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -535,6 +535,9 @@ int xfrm_init_replay(struct xfrm_state *x) replay_esn->bmp_len * sizeof(__u32) * 8) return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0) + return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) x->repl = &xfrm_replay_esn; else diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f7cf0ea6faea..8fb248843009 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1578,7 +1578,8 @@ static int may_create(struct inode *dir, return rc; if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { - rc = security_transition_sid(sid, dsec->sid, tclass, NULL, &newsid); + rc = security_transition_sid(sid, dsec->sid, tclass, + &dentry->d_name, &newsid); if (rc) return rc; } diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index e7b850ad57ee..e6e7ce0d3d55 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -502,7 +502,7 @@ static int policydb_index(struct policydb *p) goto out; rc = flex_array_prealloc(p->type_val_to_struct_array, 0, - p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO); + p->p_types.nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto out; @@ -519,7 +519,7 @@ static int policydb_index(struct policydb *p) goto out; rc = flex_array_prealloc(p->sym_val_to_name[i], - 0, p->symtab[i].nprim - 1, + 0, p->symtab[i].nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto out; @@ -2375,7 +2375,7 @@ int policydb_read(struct policydb *p, void *fp) goto bad; /* preallocate so we don't have to worry about the put ever failing */ - rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1, + rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto bad; diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index a5af834c8ef5..4ddc6d3b6678 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -434,17 +434,21 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai, mcasp_set_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE); mcasp_set_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE); - mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, (0x7 << 26)); + mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, + ACLKX | AHCLKX | AFSX); break; case SND_SOC_DAIFMT_CBM_CFS: /* codec is clock master and frame slave */ - mcasp_set_bits(base + DAVINCI_MCASP_ACLKXCTL_REG, ACLKXE); + mcasp_clr_bits(base + DAVINCI_MCASP_ACLKXCTL_REG, ACLKXE); mcasp_set_bits(base + DAVINCI_MCASP_TXFMCTL_REG, AFSXE); - mcasp_set_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE); + mcasp_clr_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE); mcasp_set_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE); - mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, (0x2d << 26)); + mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG, + ACLKX | ACLKR); + mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, + AFSX | AFSR); break; case SND_SOC_DAIFMT_CBM_CFM: /* codec is clock and frame master */ @@ -454,7 +458,8 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai, mcasp_clr_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE); mcasp_clr_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE); - mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG, (0x3f << 26)); + mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG, + ACLKX | AHCLKX | AFSX | ACLKR | AHCLKR | AFSR); break; default: @@ -644,7 +649,7 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream) mcasp_set_reg(dev->base + DAVINCI_MCASP_TXTDM_REG, mask); mcasp_set_bits(dev->base + DAVINCI_MCASP_TXFMT_REG, TXORD); - if ((dev->tdm_slots >= 2) || (dev->tdm_slots <= 32)) + if ((dev->tdm_slots >= 2) && (dev->tdm_slots <= 32)) mcasp_mod_bits(dev->base + DAVINCI_MCASP_TXFMCTL_REG, FSXMOD(dev->tdm_slots), FSXMOD(0x1FF)); else @@ -660,7 +665,7 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream) AHCLKRE); mcasp_set_reg(dev->base + DAVINCI_MCASP_RXTDM_REG, mask); - if ((dev->tdm_slots >= 2) || (dev->tdm_slots <= 32)) + if ((dev->tdm_slots >= 2) && (dev->tdm_slots <= 32)) mcasp_mod_bits(dev->base + DAVINCI_MCASP_RXFMCTL_REG, FSRMOD(dev->tdm_slots), FSRMOD(0x1FF)); else diff --git a/sound/soc/samsung/goni_wm8994.c b/sound/soc/samsung/goni_wm8994.c index f6b3a3ce5919..0e80daee8b6f 100644 --- a/sound/soc/samsung/goni_wm8994.c +++ b/sound/soc/samsung/goni_wm8994.c @@ -236,18 +236,18 @@ static struct snd_soc_dai_link goni_dai[] = { .name = "WM8994", .stream_name = "WM8994 HiFi", .cpu_dai_name = "samsung-i2s.0", - .codec_dai_name = "wm8994-hifi", + .codec_dai_name = "wm8994-aif1", .platform_name = "samsung-audio", - .codec_name = "wm8994-codec.0-0x1a", + .codec_name = "wm8994-codec.0-001a", .init = goni_wm8994_init, .ops = &goni_hifi_ops, }, { .name = "WM8994 Voice", .stream_name = "Voice", .cpu_dai_name = "goni-voice-dai", - .codec_dai_name = "wm8994-voice", + .codec_dai_name = "wm8994-aif2", .platform_name = "samsung-audio", - .codec_name = "wm8994-codec.0-0x1a", + .codec_name = "wm8994-codec.0-001a", .ops = &goni_voice_ops, }, }; diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 207dee5c5b16..0c542563ea6c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,15 +35,21 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ ) +CC = $(CROSS_COMPILE)gcc +AR = $(CROSS_COMPILE)ar + # Additional ARCH settings for x86 ifeq ($(ARCH),i386) ARCH := x86 endif ifeq ($(ARCH),x86_64) - RAW_ARCH := x86_64 - ARCH := x86 - ARCH_CFLAGS := -DARCH_X86_64 - ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S + ARCH := x86 + IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1) + ifeq (${IS_X86_64}, 1) + RAW_ARCH := x86_64 + ARCH_CFLAGS := -DARCH_X86_64 + ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S + endif endif # @@ -119,8 +125,6 @@ lib = lib export prefix bindir sharedir sysconfdir -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar RM = rm -f MKDIR = mkdir FIND = find diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 0b9df8303dcf..8df1ca104a7f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", - entry->fields.dest, entry->fields.dest_mode, + entry->fields.dest_id, entry->fields.dest_mode, entry->fields.delivery_mode, entry->fields.vector, entry->fields.trig_mode); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6330653480e4..ed3c4e7c1008 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -648,7 +648,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) + /* We can read the guest memory with __xxx_user() later on. */ + if (user_alloc && + ((mem->userspace_addr & (PAGE_SIZE - 1)) || + !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size))) goto out; if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; @@ -996,23 +999,6 @@ out: return size; } -int memslot_id(struct kvm *kvm, gfn_t gfn) -{ - int i; - struct kvm_memslots *slots = kvm_memslots(kvm); - struct kvm_memory_slot *memslot = NULL; - - for (i = 0; i < slots->nmemslots; ++i) { - memslot = &slots->memslots[i]; - - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) - break; - } - - return memslot - slots->memslots; -} - static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, gfn_t *nr_pages) { @@ -1300,7 +1286,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = copy_from_user(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; |