diff options
35 files changed, 549 insertions, 210 deletions
diff --git a/Documentation/ABI/testing/ppc-memtrace b/Documentation/ABI/testing/ppc-memtrace index 2e8b93741270..9606aed33137 100644 --- a/Documentation/ABI/testing/ppc-memtrace +++ b/Documentation/ABI/testing/ppc-memtrace @@ -13,10 +13,11 @@ Contact: [email protected] Description: Write an integer containing the size in bytes of the memory you want removed from each NUMA node to this file - it must be aligned to the memblock size. This amount of RAM will be removed - from the kernel mappings and the following debugfs files will be - created. This can only be successfully done once per boot. Once - memory is successfully removed from each node, the following - files are created. + from each NUMA node in the kernel mappings and the following + debugfs files will be created. Once memory is successfully + removed from each node, the following files are created. To + re-add memory to the kernel, echo 0 into this file (it will be + automatically onlined). What: /sys/kernel/debug/powerpc/memtrace/<node-id> Date: Aug 2017 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 828efa7c2a35..8397c7bd5880 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -237,6 +237,7 @@ endif cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 +cpu-as-$(CONFIG_E500) += -Wa,-me500 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 70fdc5b9b9fb..1f4691ce4126 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -146,6 +146,7 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; +extern s32 patch__memset_nocache, patch__memcpy_nocache; extern long flush_count_cache; diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index c81793d47af9..f82ee8a3b561 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -137,10 +137,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); shift = mmu_psize_defs[psize].shift; \ for (index = 0; vpn < __end; index++, \ vpn += (1L << (shift - VPN_SHIFT))) { \ - if (!__split || __rpte_sub_valid(rpte, index)) \ - do { + if (!__split || __rpte_sub_valid(rpte, index)) -#define pte_iterate_hashed_end() } while(0); } } while(0) +#define pte_iterate_hashed_end() } } while(0) #define pte_pagesize_index(mm, addr, pte) \ (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2f74bdc805e0..b3520b549cba 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -497,6 +497,9 @@ extern void hpte_init_native(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); +void slb_flush_all_realmode(void); +void __slb_restore_bolted_realmode(void); +void slb_restore_bolted_realmode(void); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 5a23010af600..1e7a33592e29 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -195,9 +195,6 @@ struct fadump_crash_info_header { struct cpumask online_mask; }; -/* Crash memory ranges */ -#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) - struct fad_crash_memory_ranges { unsigned long long base; unsigned long long size; diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 3225eb6402cc..2d00cc530083 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -84,9 +84,6 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, return dir + idx; } -pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, - unsigned long addr, unsigned *shift); - void flush_dcache_icache_hugepage(struct page *page); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 896efa559996..b2f89b621b15 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif @@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) { int c; - c = atomic_dec_if_positive(&mm->context.copros); - - /* Detect imbalance between add and remove */ - WARN_ON(c < 0); - /* - * Need to broadcast a global flush of the full mm before - * decrementing active_cpus count, as the next TLBI may be - * local and the nMMU and/or PSL need to be cleaned up. - * Should be rare enough so that it's acceptable. + * When removing the last copro, we need to broadcast a global + * flush of the full mm, as the next TLBI may be local and the + * nMMU and/or PSL need to be cleaned up. + * + * Both the 'copros' and 'active_cpus' counts are looked at in + * flush_all_mm() to determine the scope (local/global) of the + * TLBIs, so we need to flush first before decrementing + * 'copros'. If this API is used by several callers for the + * same context, it can lead to over-flushing. It's hopefully + * not common enough to be a problem. * * Skip on hash, as we don't know how to do the proper flush * for the time being. Invalidations will remain global if - * used on hash. + * used on hash. Note that we can't drop 'copros' either, as + * it could make some invalidations local with no flush + * in-between. */ - if (c == 0 && radix_enabled()) { + if (radix_enabled()) { flush_all_mm(mm); - dec_mm_active_cpus(mm); + + c = atomic_dec_if_positive(&mm->context.copros); + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + + if (c == 0) + dec_mm_active_cpus(mm); } } #else diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 97ecef697e1b..f0e571b2dc7c 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -49,13 +49,11 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, unsigned int page_size) { - if (tlb->fullmm) - return; - if (!tlb->page_size) tlb->page_size = page_size; else if (tlb->page_size != page_size) { - tlb_flush_mmu(tlb); + if (!tlb->fullmm) + tlb_flush_mmu(tlb); /* * update the page size after flush for the new * mmu_gather. diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 643cfbd5bcb5..bac225bb7f64 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -249,10 +249,17 @@ do { \ } \ } while (0) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __long_type(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ @@ -266,7 +273,7 @@ do { \ #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ - unsigned long __gu_val = 0; \ + __long_type(*(ptr)) __gu_val = 0; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ @@ -280,7 +287,7 @@ do { \ #define __get_user_nosleep(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ barrier_nospec(); \ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 07e8396d472b..986ec476fd5d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -47,8 +47,10 @@ static struct fadump_mem_struct fdm; static const struct fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); -struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +struct fad_crash_memory_ranges *crash_memory_ranges; +int crash_memory_ranges_size; int crash_mem_ranges; +int max_crash_mem_ranges; /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -868,38 +870,107 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return 0; } -static inline void fadump_add_crash_memory(unsigned long long base, - unsigned long long end) +static void free_crash_memory_ranges(void) +{ + kfree(crash_memory_ranges); + crash_memory_ranges = NULL; + crash_memory_ranges_size = 0; + max_crash_mem_ranges = 0; +} + +/* + * Allocate or reallocate crash memory ranges array in incremental units + * of PAGE_SIZE. + */ +static int allocate_crash_memory_ranges(void) { + struct fad_crash_memory_ranges *new_array; + u64 new_size; + + new_size = crash_memory_ranges_size + PAGE_SIZE; + pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", + new_size); + + new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); + if (new_array == NULL) { + pr_err("Insufficient memory for setting up crash memory ranges\n"); + free_crash_memory_ranges(); + return -ENOMEM; + } + + crash_memory_ranges = new_array; + crash_memory_ranges_size = new_size; + max_crash_mem_ranges = (new_size / + sizeof(struct fad_crash_memory_ranges)); + return 0; +} + +static inline int fadump_add_crash_memory(unsigned long long base, + unsigned long long end) +{ + u64 start, size; + bool is_adjacent = false; + if (base == end) - return; + return 0; + + /* + * Fold adjacent memory ranges to bring down the memory ranges/ + * PT_LOAD segments count. + */ + if (crash_mem_ranges) { + start = crash_memory_ranges[crash_mem_ranges - 1].base; + size = crash_memory_ranges[crash_mem_ranges - 1].size; + + if ((start + size) == base) + is_adjacent = true; + } + if (!is_adjacent) { + /* resize the array on reaching the limit */ + if (crash_mem_ranges == max_crash_mem_ranges) { + int ret; + + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } + start = base; + crash_memory_ranges[crash_mem_ranges].base = start; + crash_mem_ranges++; + } + + crash_memory_ranges[crash_mem_ranges - 1].size = (end - start); pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", - crash_mem_ranges, base, end - 1, (end - base)); - crash_memory_ranges[crash_mem_ranges].base = base; - crash_memory_ranges[crash_mem_ranges].size = end - base; - crash_mem_ranges++; + (crash_mem_ranges - 1), start, end - 1, (end - start)); + return 0; } -static void fadump_exclude_reserved_area(unsigned long long start, +static int fadump_exclude_reserved_area(unsigned long long start, unsigned long long end) { unsigned long long ra_start, ra_end; + int ret = 0; ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; if ((ra_start < end) && (ra_end > start)) { if ((start < ra_start) && (end > ra_end)) { - fadump_add_crash_memory(start, ra_start); - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(start, ra_start); + if (ret) + return ret; + + ret = fadump_add_crash_memory(ra_end, end); } else if (start < ra_start) { - fadump_add_crash_memory(start, ra_start); + ret = fadump_add_crash_memory(start, ra_start); } else if (ra_end < end) { - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(ra_end, end); } } else - fadump_add_crash_memory(start, end); + ret = fadump_add_crash_memory(start, end); + + return ret; } static int fadump_init_elfcore_header(char *bufp) @@ -939,13 +1010,22 @@ static int fadump_init_elfcore_header(char *bufp) * Traverse through memblock structure and setup crash memory ranges. These * ranges will be used create PT_LOAD program headers in elfcore header. */ -static void fadump_setup_crash_memory_ranges(void) +static int fadump_setup_crash_memory_ranges(void) { struct memblock_region *reg; unsigned long long start, end; + int ret; pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; + + /* allocate memory for crash memory ranges for the first time */ + if (!max_crash_mem_ranges) { + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } + /* * add the first memory chunk (RMA_START through boot_memory_size) as * a separate memory chunk. The reason is, at the time crash firmware @@ -953,7 +1033,9 @@ static void fadump_setup_crash_memory_ranges(void) * specified during fadump registration. We need to create a separate * program header for this chunk with the correct offset. */ - fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + if (ret) + return ret; for_each_memblock(memory, reg) { start = (unsigned long long)reg->base; @@ -973,8 +1055,12 @@ static void fadump_setup_crash_memory_ranges(void) } /* add this range excluding the reserved dump area. */ - fadump_exclude_reserved_area(start, end); + ret = fadump_exclude_reserved_area(start, end); + if (ret) + return ret; } + + return 0; } /* @@ -1097,6 +1183,7 @@ static int register_fadump(void) { unsigned long addr; void *vaddr; + int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1105,7 +1192,9 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - fadump_setup_crash_memory_ranges(); + ret = fadump_setup_crash_memory_ranges(); + if (ret) + return ret; addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); /* Initialize fadump crash info header. */ @@ -1183,6 +1272,7 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fadump_unregister_dump(&fdm); + free_crash_memory_ranges(); } } diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index b107e3df7f8f..7f5ac2e8581b 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -146,7 +146,9 @@ power9_restore_additional_sprs: mtspr SPRN_MMCR1, r4 ld r3, STOP_MMCR2(r13) + ld r4, PACA_SPRG_VDSO(r13) mtspr SPRN_MMCR2, r3 + mtspr SPRN_SPRG3, r4 blr /* diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 0bd23dc789a4..c77e95e9b384 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -269,18 +269,14 @@ int setup_new_fdt(const struct kimage *image, void *fdt, ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-start", initrd_load_addr); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; /* initrd-end is the first address after the initrd image. */ ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", initrd_load_addr + initrd_len); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); if (ret) { @@ -292,10 +288,8 @@ int setup_new_fdt(const struct kimage *image, void *fdt, if (cmdline != NULL) { ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; } else { ret = fdt_delprop(fdt, chosen_node, "bootargs"); if (ret && ret != -FDT_ERR_NOTFOUND) { @@ -311,10 +305,12 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); - if (ret) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret) + goto err; return 0; + +err: + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index d6756af6ec78..3497c8329c1d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,11 +62,8 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) #ifdef CONFIG_PPC_BOOK3S_64 static void flush_and_reload_slb(void) { - struct slb_shadow *slb; - unsigned long i, n; - /* Invalidate all SLBs */ - asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + slb_flush_all_realmode(); #ifdef CONFIG_KVM_BOOK3S_HANDLER /* @@ -76,22 +73,17 @@ static void flush_and_reload_slb(void) if (get_paca()->kvm_hstate.in_guest) return; #endif - - /* For host kernel, reload the SLBs from shadow SLB buffer. */ - slb = get_slb_shadow(); - if (!slb) + if (early_radix_enabled()) return; - n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); - - /* Load up the SLB entries from shadow SLB */ - for (i = 0; i < n; i++) { - unsigned long rb = be64_to_cpu(slb->save_area[i].esid); - unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); + /* + * This probably shouldn't happen, but it may be possible it's + * called in early boot before SLB shadows are allocated. + */ + if (!get_slb_shadow()) + return; - rb = (rb & ~0xFFFul) | i; - asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); - } + slb_restore_bolted_realmode(); } #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0433bf24a10d..9b38a2e5dd35 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -628,7 +628,7 @@ static void __init early_cmdline_parse(void) const char *opt; char *p; - int l = 0; + int l __maybe_unused = 0; prom_cmd_line[0] = 0; p = prom_cmd_line; @@ -1421,7 +1421,10 @@ static void __init reserve_mem(u64 base, u64 size) static void __init prom_init_mem(void) { phandle node; - char *path, type[64]; +#ifdef DEBUG_PROM + char *path; +#endif + char type[64]; unsigned int plen; cell_t *p, *endp; __be32 val; @@ -1442,7 +1445,9 @@ static void __init prom_init_mem(void) prom_debug("root_size_cells: %x\n", rsc); prom_debug("scanning memory:\n"); +#ifdef DEBUG_PROM path = prom_scratch; +#endif for (node = 0; prom_next_node(&node); ) { type[0] = 0; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 0e3743343280..8c507be12c3c 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -43,6 +43,8 @@ #include <asm/kdump.h> #include <asm/feature-fixups.h> +#include "setup.h" + #define DBG(fmt...) extern void bootx_init(unsigned long r4, unsigned long phys); @@ -97,11 +99,10 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * We do the initial parsing of the flat device-tree and prepares * for the MMU to be fully initialized. */ -extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ - notrace void __init machine_init(u64 dt_ptr) { - unsigned int *addr = &memset_nocache_branch; + unsigned int *addr = (unsigned int *)((unsigned long)&patch__memset_nocache + + patch__memset_nocache); unsigned long insn; /* Configure static keys first, now that we're relocated. */ @@ -110,7 +111,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP); insn = create_cond_branch(addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 33b30d901381..e8afdd4f4814 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 757976b3f640..506a4d400458 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -275,7 +275,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) @@ -461,7 +462,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index da425bb6b369..ba66846fe973 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -13,6 +13,7 @@ #include <asm/errno.h> #include <asm/ppc_asm.h> #include <asm/export.h> +#include <asm/code-patching-asm.h> #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -107,8 +108,8 @@ _GLOBAL(memset) * Skip optimised bloc until cache is enabled. Will be replaced * by 'bne' during boot to use normal procedure if r4 is not zero */ -_GLOBAL(memset_nocache_branch) - b 2f +5: b 2f + patch_site 5b, patch__memset_nocache clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 @@ -168,7 +169,9 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) - b generic_memcpy +1: b generic_memcpy + patch_site 1b, patch__memcpy_nocache + add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 668e87d03f9e..82a0e37557a5 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(kmap_atomic_prot); void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; + int type __maybe_unused; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index abb43646927a..a4ca57612558 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/swap.h> #include <asm/mmu_context.h> +#include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); if (!mem->hpas) { kfree(mem); @@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index cb796724a6fc..0b095fa54049 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -90,6 +90,45 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } +/* + * Insert bolted entries into SLB (which may not be empty, so don't clear + * slb_cache_ptr). + */ +void __slb_restore_bolted_realmode(void) +{ + struct slb_shadow *p = get_slb_shadow(); + enum slb_index index; + + /* No isync needed because realmode. */ + for (index = 0; index < SLB_NUM_BOLTED; index++) { + asm volatile("slbmte %0,%1" : + : "r" (be64_to_cpu(p->save_area[index].vsid)), + "r" (be64_to_cpu(p->save_area[index].esid))); + } +} + +/* + * Insert the bolted entries into an empty SLB. + * This is not the same as rebolt because the bolted segments are not + * changed, just loaded from the shadow area. + */ +void slb_restore_bolted_realmode(void) +{ + __slb_restore_bolted_realmode(); + get_paca()->slb_cache_ptr = 0; +} + +/* + * This flushes all SLB entries including 0, so it must be realmode. + */ +void slb_flush_all_realmode(void) +{ + /* + * This flushes all SLB entries including 0, so it must be realmode. + */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); +} + static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index df762bb3c735..04527d13d5a4 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -781,12 +781,12 @@ static int __init pmac_add_bridge(struct device_node *dev) struct resource rsrc; char *disp_name; const int *bus_range; - int primary = 1, has_address = 0; + int primary = 1; DBG("Adding PCI host bridge %pOF\n", dev); /* Fetch host bridge registers address */ - has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); + of_address_to_resource(dev, 0, &rsrc); /* Get bus range if any */ bus_range = of_get_property(dev, "bus-range", &len); @@ -904,7 +904,7 @@ static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge) void __init pmac_pci_init(void) { struct device_node *np, *root; - struct device_node *ht = NULL; + struct device_node *ht __maybe_unused = NULL; pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -1019,7 +1019,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev) return true; } -void pmac_pci_fixup_ohci(struct pci_dev *dev) +static void pmac_pci_fixup_ohci(struct pci_dev *dev) { struct device_node *node = pci_device_to_OF_node(dev); @@ -1054,7 +1054,7 @@ void __init pmac_pcibios_after_init(void) } } -void pmac_pci_fixup_cardbus(struct pci_dev* dev) +static void pmac_pci_fixup_cardbus(struct pci_dev *dev) { if (!machine_is(powermac)) return; @@ -1091,7 +1091,7 @@ void pmac_pci_fixup_cardbus(struct pci_dev* dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); -void pmac_pci_fixup_pciata(struct pci_dev* dev) +static void pmac_pci_fixup_pciata(struct pci_dev *dev) { u8 progif = 0; diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 12e6e4d30602..f92c1918fb56 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -34,6 +34,8 @@ #include <asm/nvram.h> #include <asm/smu.h> +#include "pmac.h" + #undef DEBUG #ifdef DEBUG @@ -249,7 +251,7 @@ int pmac_set_rtc_time(struct rtc_time *tm) * Calibrate the decrementer register using VIA timer 1. * This is used both on powermacs and CHRP machines. */ -int __init via_calibrate_decr(void) +static int __init via_calibrate_decr(void) { struct device_node *vias; volatile unsigned char __iomem *via; diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index d83135a9830e..8901973ed683 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -73,7 +73,7 @@ void udbg_scc_init(int force_scc) struct device_node *stdout = NULL, *escc = NULL, *macio = NULL; struct device_node *ch, *ch_def = NULL, *ch_a = NULL; const char *path; - int i, x; + int i; escc = of_find_node_by_name(NULL, "escc"); if (escc == NULL) @@ -120,7 +120,7 @@ void udbg_scc_init(int force_scc) mb(); for (i = 20000; i != 0; --i) - x = in_8(sccc); + in_8(sccc); out_8(sccc, 0x09); /* reset A or B side */ out_8(sccc, 0xc0); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index ecb002c5db83..35f699ebb662 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -817,7 +817,7 @@ static int pnv_parse_cpuidle_dt(void) goto out; } for (i = 0; i < nr_idle_states; i++) - strncpy(pnv_idle_states[i].name, temp_string[i], + strlcpy(pnv_idle_states[i].name, temp_string[i], PNV_IDLE_NAME_LEN); nr_pnv_idle_states = nr_idle_states; rc = 0; diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index f73101119eba..51dc398ae3f7 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -177,8 +177,11 @@ static int memtrace_init_debugfs(void) snprintf(ent->name, 16, "%08x", ent->nid); dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); - if (!dir) + if (!dir) { + pr_err("Failed to create debugfs directory for node %d\n", + ent->nid); return -1; + } ent->dir = dir; debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); @@ -189,18 +192,93 @@ static int memtrace_init_debugfs(void) return ret; } +static int online_mem_block(struct memory_block *mem, void *arg) +{ + return device_online(&mem->dev); +} + +/* + * Iterate through the chunks of memory we have removed from the kernel + * and attempt to add them back to the kernel. + */ +static int memtrace_online(void) +{ + int i, ret = 0; + struct memtrace_entry *ent; + + for (i = memtrace_array_nr - 1; i >= 0; i--) { + ent = &memtrace_array[i]; + + /* We have onlined this chunk previously */ + if (ent->nid == -1) + continue; + + /* Remove from io mappings */ + if (ent->mem) { + iounmap(ent->mem); + ent->mem = 0; + } + + if (add_memory(ent->nid, ent->start, ent->size)) { + pr_err("Failed to add trace memory to node %d\n", + ent->nid); + ret += 1; + continue; + } + + /* + * If kernel isn't compiled with the auto online option + * we need to online the memory ourselves. + */ + if (!memhp_auto_online) { + walk_memory_range(PFN_DOWN(ent->start), + PFN_UP(ent->start + ent->size - 1), + NULL, online_mem_block); + } + + /* + * Memory was added successfully so clean up references to it + * so on reentry we can tell that this chunk was added. + */ + debugfs_remove_recursive(ent->dir); + pr_info("Added trace memory back to node %d\n", ent->nid); + ent->size = ent->start = ent->nid = -1; + } + if (ret) + return ret; + + /* If all chunks of memory were added successfully, reset globals */ + kfree(memtrace_array); + memtrace_array = NULL; + memtrace_size = 0; + memtrace_array_nr = 0; + return 0; +} + static int memtrace_enable_set(void *data, u64 val) { - if (memtrace_size) + u64 bytes; + + /* + * Don't attempt to do anything if size isn't aligned to a memory + * block or equal to zero. + */ + bytes = memory_block_size_bytes(); + if (val & (bytes - 1)) { + pr_err("Value must be aligned with 0x%llx\n", bytes); return -EINVAL; + } - if (!val) - return -EINVAL; + /* Re-add/online previously removed/offlined memory */ + if (memtrace_size) { + if (memtrace_online()) + return -EAGAIN; + } - /* Make sure size is aligned to a memory block */ - if (val & (memory_block_size_bytes() - 1)) - return -EINVAL; + if (!val) + return 0; + /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) return -EINVAL; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 14a46b07ab2f..851ce326874a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -367,7 +367,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) } savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index eb69a5186243..280e964e1aa8 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -196,7 +196,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, rsrc.end - rsrc.start); + msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b7dd683b0470..4264aedc7775 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -56,6 +56,7 @@ #include <asm/opal.h> #include <asm/firmware.h> #include <asm/code-patching.h> +#include <asm/sections.h> #ifdef CONFIG_PPC64 #include <asm/hvcall.h> @@ -244,6 +245,7 @@ Commands:\n\ f flush cache\n\ la lookup symbol+offset of specified address\n\ ls lookup address of specified symbol\n\ + lp s [#] lookup address of percpu symbol s for current cpu, or cpu #\n\ m examine/change memory\n\ mm move a block of memory\n\ ms set a block of memory\n\ @@ -2733,7 +2735,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - unsigned long inst, last_inst = 0; + unsigned int inst, last_inst = 0; unsigned char val[4]; dotted = 0; @@ -2757,7 +2759,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8lx", adr, inst); + printf(REG" %.8x", adr, inst); printf("\t"); dump_func(inst, adr); printf("\n"); @@ -3352,7 +3354,8 @@ static void symbol_lookup(void) { int type = inchar(); - unsigned long addr; + unsigned long addr, cpu; + void __percpu *ptr = NULL; static char tmp[64]; switch (type) { @@ -3376,6 +3379,34 @@ symbol_lookup(void) catch_memory_errors = 0; termch = 0; break; + case 'p': + getstring(tmp, 64); + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + ptr = (void __percpu *)kallsyms_lookup_name(tmp); + sync(); + } + + if (ptr && + ptr >= (void __percpu *)__per_cpu_start && + ptr < (void __percpu *)__per_cpu_end) + { + if (scanhex(&cpu) && cpu < num_possible_cpus()) { + addr = (unsigned long)per_cpu_ptr(ptr, cpu); + } else { + cpu = raw_smp_processor_id(); + addr = (unsigned long)this_cpu_ptr(ptr); + } + + printf("%s for cpu 0x%lx: %lx\n", tmp, cpu, addr); + } else { + printf("Percpu symbol '%s' not found.\n", tmp); + } + + catch_memory_errors = 0; + termch = 0; + break; } } diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index f5dc740fcd13..3bc0c15d4d85 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -913,11 +913,6 @@ static int afu_properties_look_ok(struct cxl_afu *afu) return -EINVAL; } - if (afu->crs_len < 0) { - dev_err(&afu->dev, "Unexpected configuration record size value\n"); - return -EINVAL; - } - return 0; } diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 7709fcc707f4..5414c4a87bea 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -73,7 +73,7 @@ static LIST_HEAD(hvc_structs); * Protect the list of hvc_struct instances from inserts and removals during * list traversal. */ -static DEFINE_SPINLOCK(hvc_structs_lock); +static DEFINE_MUTEX(hvc_structs_mutex); /* * This value is used to assign a tty->index value to a hvc_struct based @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(hvc_structs_lock); static int last_hvc = -1; /* - * Do not call this function with either the hvc_structs_lock or the hvc_struct + * Do not call this function with either the hvc_structs_mutex or the hvc_struct * lock held. If successful, this function increments the kref reference * count against the target hvc_struct so it should be released when finished. */ @@ -92,24 +92,46 @@ static struct hvc_struct *hvc_get_by_index(int index) struct hvc_struct *hp; unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { spin_lock_irqsave(&hp->lock, flags); if (hp->index == index) { tty_port_get(&hp->port); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); return hp; } spin_unlock_irqrestore(&hp->lock, flags); } hp = NULL; + mutex_unlock(&hvc_structs_mutex); - spin_unlock(&hvc_structs_lock); return hp; } +static int __hvc_flush(const struct hv_ops *ops, uint32_t vtermno, bool wait) +{ + if (wait) + might_sleep(); + + if (ops->flush) + return ops->flush(vtermno, wait); + return 0; +} + +static int hvc_console_flush(const struct hv_ops *ops, uint32_t vtermno) +{ + return __hvc_flush(ops, vtermno, false); +} + +/* + * Wait for the console to flush before writing more to it. This sleeps. + */ +static int hvc_flush(struct hvc_struct *hp) +{ + return __hvc_flush(hp->ops, hp->vtermno, true); +} /* * Initial console vtermnos for console API usage prior to full console @@ -156,8 +178,12 @@ static void hvc_console_print(struct console *co, const char *b, if (r <= 0) { /* throw away characters on error * but spin in case of -EAGAIN */ - if (r != -EAGAIN) + if (r != -EAGAIN) { i = 0; + } else { + hvc_console_flush(cons_ops[index], + vtermnos[index]); + } } else if (r > 0) { i -= r; if (i > 0) @@ -165,6 +191,7 @@ static void hvc_console_print(struct console *co, const char *b, } } } + hvc_console_flush(cons_ops[index], vtermnos[index]); } static struct tty_driver *hvc_console_device(struct console *c, int *index) @@ -224,13 +251,13 @@ static void hvc_port_destruct(struct tty_port *port) struct hvc_struct *hp = container_of(port, struct hvc_struct, port); unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); spin_lock_irqsave(&hp->lock, flags); list_del(&(hp->next)); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); kfree(hp); } @@ -494,23 +521,32 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count if (hp->port.count <= 0) return -EIO; - spin_lock_irqsave(&hp->lock, flags); + while (count > 0) { + spin_lock_irqsave(&hp->lock, flags); - /* Push pending writes */ - if (hp->n_outbuf > 0) - hvc_push(hp); - - while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) { - if (rsize > count) - rsize = count; - memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); - count -= rsize; - buf += rsize; - hp->n_outbuf += rsize; - written += rsize; - hvc_push(hp); + rsize = hp->outbuf_size - hp->n_outbuf; + + if (rsize) { + if (rsize > count) + rsize = count; + memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); + count -= rsize; + buf += rsize; + hp->n_outbuf += rsize; + written += rsize; + } + + if (hp->n_outbuf > 0) + hvc_push(hp); + + spin_unlock_irqrestore(&hp->lock, flags); + + if (count) { + if (hp->n_outbuf > 0) + hvc_flush(hp); + cond_resched(); + } } - spin_unlock_irqrestore(&hp->lock, flags); /* * Racy, but harmless, kick thread if there is still pending data. @@ -590,10 +626,10 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -int hvc_poll(struct hvc_struct *hp) +static int __hvc_poll(struct hvc_struct *hp, bool may_sleep) { struct tty_struct *tty; - int i, n, poll_mask = 0; + int i, n, count, poll_mask = 0; char buf[N_INBUF] __ALIGNED__; unsigned long flags; int read_total = 0; @@ -612,6 +648,12 @@ int hvc_poll(struct hvc_struct *hp) timeout = (written_total) ? 0 : MIN_TIMEOUT; } + if (may_sleep) { + spin_unlock_irqrestore(&hp->lock, flags); + cond_resched(); + spin_lock_irqsave(&hp->lock, flags); + } + /* No tty attached, just skip */ tty = tty_port_tty_get(&hp->port); if (tty == NULL) @@ -619,7 +661,7 @@ int hvc_poll(struct hvc_struct *hp) /* Now check if we can get data (are we throttled ?) */ if (tty_throttled(tty)) - goto throttled; + goto out; /* If we aren't notifier driven and aren't throttled, we always * request a reschedule @@ -628,56 +670,58 @@ int hvc_poll(struct hvc_struct *hp) poll_mask |= HVC_POLL_READ; /* Read data if any */ - for (;;) { - int count = tty_buffer_request_room(&hp->port, N_INBUF); - /* If flip is full, just reschedule a later read */ - if (count == 0) { + count = tty_buffer_request_room(&hp->port, N_INBUF); + + /* If flip is full, just reschedule a later read */ + if (count == 0) { + poll_mask |= HVC_POLL_READ; + goto out; + } + + n = hp->ops->get_chars(hp->vtermno, buf, count); + if (n <= 0) { + /* Hangup the tty when disconnected from host */ + if (n == -EPIPE) { + spin_unlock_irqrestore(&hp->lock, flags); + tty_hangup(tty); + spin_lock_irqsave(&hp->lock, flags); + } else if ( n == -EAGAIN ) { + /* + * Some back-ends can only ensure a certain min + * num of bytes read, which may be > 'count'. + * Let the tty clear the flip buff to make room. + */ poll_mask |= HVC_POLL_READ; - break; } + goto out; + } - n = hp->ops->get_chars(hp->vtermno, buf, count); - if (n <= 0) { - /* Hangup the tty when disconnected from host */ - if (n == -EPIPE) { - spin_unlock_irqrestore(&hp->lock, flags); - tty_hangup(tty); - spin_lock_irqsave(&hp->lock, flags); - } else if ( n == -EAGAIN ) { - /* - * Some back-ends can only ensure a certain min - * num of bytes read, which may be > 'count'. - * Let the tty clear the flip buff to make room. - */ - poll_mask |= HVC_POLL_READ; - } - break; - } - for (i = 0; i < n; ++i) { + for (i = 0; i < n; ++i) { #ifdef CONFIG_MAGIC_SYSRQ - if (hp->index == hvc_console.index) { - /* Handle the SysRq Hack */ - /* XXX should support a sequence */ - if (buf[i] == '\x0f') { /* ^O */ - /* if ^O is pressed again, reset - * sysrq_pressed and flip ^O char */ - sysrq_pressed = !sysrq_pressed; - if (sysrq_pressed) - continue; - } else if (sysrq_pressed) { - handle_sysrq(buf[i]); - sysrq_pressed = 0; + if (hp->index == hvc_console.index) { + /* Handle the SysRq Hack */ + /* XXX should support a sequence */ + if (buf[i] == '\x0f') { /* ^O */ + /* if ^O is pressed again, reset + * sysrq_pressed and flip ^O char */ + sysrq_pressed = !sysrq_pressed; + if (sysrq_pressed) continue; - } + } else if (sysrq_pressed) { + handle_sysrq(buf[i]); + sysrq_pressed = 0; + continue; } -#endif /* CONFIG_MAGIC_SYSRQ */ - tty_insert_flip_char(&hp->port, buf[i], 0); } - - read_total += n; +#endif /* CONFIG_MAGIC_SYSRQ */ + tty_insert_flip_char(&hp->port, buf[i], 0); } - throttled: + if (n == count) + poll_mask |= HVC_POLL_READ; + read_total = n; + + out: /* Wakeup write queue if necessary */ if (hp->do_wakeup) { hp->do_wakeup = 0; @@ -697,6 +741,11 @@ int hvc_poll(struct hvc_struct *hp) return poll_mask; } + +int hvc_poll(struct hvc_struct *hp) +{ + return __hvc_poll(hp, false); +} EXPORT_SYMBOL_GPL(hvc_poll); /** @@ -733,11 +782,12 @@ static int khvcd(void *unused) try_to_freeze(); wmb(); if (!cpus_are_in_xmon()) { - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { - poll_mask |= hvc_poll(hp); + poll_mask |= __hvc_poll(hp, true); + cond_resched(); } - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); } else poll_mask |= HVC_POLL_READ; if (hvc_kicked) @@ -871,7 +921,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, INIT_WORK(&hp->tty_resize, hvc_set_winsz); spin_lock_init(&hp->lock); - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); /* * find index to use: @@ -891,7 +941,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, vtermnos[i] = vtermno; list_add_tail(&(hp->next), &hvc_structs); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); /* check if we need to re-register the kernel console */ hvc_check_console(i); diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h index ea63090e013f..e9319954c832 100644 --- a/drivers/tty/hvc/hvc_console.h +++ b/drivers/tty/hvc/hvc_console.h @@ -54,6 +54,7 @@ struct hvc_struct { struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); + int (*flush)(uint32_t vtermno, bool wait); /* Callbacks for notification. Called in open, close and hangup */ int (*notifier_add)(struct hvc_struct *hp, int irq); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 11a4c194d6e3..96721b154454 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -419,17 +419,17 @@ static void tce_iommu_unuse_page(struct tce_container *container, } static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, - unsigned long tce, unsigned long size, + unsigned long tce, unsigned long shift, unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) { long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(container->mm, tce, size); + mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift); if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; @@ -450,7 +450,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, return; ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), - IOMMU_PAGE_SIZE(tbl), &hpa, &mem); + tbl->it_page_shift, &hpa, &mem); if (ret) pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n", __func__, be64_to_cpu(*pua), entry, ret); @@ -566,7 +566,7 @@ static long tce_iommu_build_v2(struct tce_container *container, __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, - tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); + tce, tbl->it_page_shift, &hpa, &mem); if (ret) break; diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 66d31de60b9a..9d7166dfad1e 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -85,13 +85,13 @@ wait: return status; } -static void alarm_handler(int signum) +static void sig_handler(int signum) { - /* Jut wake us up from waitpid */ + /* Just wake us up from waitpid */ } -static struct sigaction alarm_action = { - .sa_handler = alarm_handler, +static struct sigaction sig_action = { + .sa_handler = sig_handler, }; void test_harness_set_timeout(uint64_t time) @@ -106,8 +106,14 @@ int test_harness(int (test_function)(void), char *name) test_start(name); test_set_git_version(GIT_VERSION); - if (sigaction(SIGALRM, &alarm_action, NULL)) { - perror("sigaction"); + if (sigaction(SIGINT, &sig_action, NULL)) { + perror("sigaction (sigint)"); + test_error(name); + return 1; + } + + if (sigaction(SIGALRM, &sig_action, NULL)) { + perror("sigaction (sigalrm)"); test_error(name); return 1; } |