s390 updates for 6.11-rc5

- Fix KASLR base offset to account for symbol offsets in the vmlinux
   ELF file, preventing tool breakages like the drgn debugger
 
 - Fix potential memory corruption of physmem_info during kernel physical
   address randomization
 
 - Fix potential memory corruption due to overlap between the relocated
   lowcore and identity mapping by correctly reserving lowcore memory
 
 - Fix performance regression and avoid randomizing identity mapping base
   by default
 
 - Fix unnecessary delay of AP bus binding complete uevent to prevent
   startup lag in KVM guests using AP
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEE3QHqV+H2a8xAv27vjYWKoQLXFBgFAmbJDQYACgkQjYWKoQLX
 FBgTdwf9FNkHvLFhf5JbqlIERrjI9Ax8lQwCrAAJOidWwyKKs5hkFXUbf8JeMO1/
 r/eIWI/hqeeQhm/YXWsdrO1KOi2tS92eHTztelTZjKS7d2nLEkl5EELRtE6lVwWK
 6T/iENQNtBibRnK6zDRb3acb/MGkdQEDfNmvRwI02ZwIvGlv6bQnQEspKc69YJOo
 DiDHb+aqpsSjAY9QlRzM/Dxg3NUknEYOfxoDY6rG9cL1KnZxk+PDfy+z9gno44Tx
 vf+G55lBQ+vunQsV/9YHKYsytsj7kYCECp/W50W1ExrOBPhZRR9zM2S14BVCGuIW
 EdLVD8R1h0oRcgqlCIrKsnxAqatzIQ==
 =RsEC
 -----END PGP SIGNATURE-----

Merge tag 's390-6.11-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 fixes from Vasily Gorbik:

 - Fix KASLR base offset to account for symbol offsets in the vmlinux
   ELF file, preventing tool breakages like the drgn debugger

 - Fix potential memory corruption of physmem_info during kernel
   physical address randomization

 - Fix potential memory corruption due to overlap between the relocated
   lowcore and identity mapping by correctly reserving lowcore memory

 - Fix performance regression and avoid randomizing identity mapping
   base by default

 - Fix unnecessary delay of AP bus binding complete uevent to prevent
   startup lag in KVM guests using AP

* tag 's390-6.11-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/boot: Fix KASLR base offset off by __START_KERNEL bytes
  s390/boot: Avoid possible physmem_info segment corruption
  s390/ap: Refine AP bus bindings complete processing
  s390/mm: Pin identity mapping base to zero
  s390/mm: Prevent lowcore vs identity mapping overlap
This commit is contained in:
Linus Torvalds 2024-08-25 12:05:23 +12:00
commit 48fb4b3d9b
9 changed files with 91 additions and 34 deletions

View file

@ -604,6 +604,19 @@ config RANDOMIZE_BASE
as a security feature that deters exploit attempts relying on
knowledge of the location of kernel internals.
config RANDOMIZE_IDENTITY_BASE
bool "Randomize the address of the identity mapping base"
depends on RANDOMIZE_BASE
default DEBUG_VM
help
The identity mapping base address is pinned to zero by default.
Allow randomization of that base to expose otherwise missed
notion of physical and virtual addresses of data structures.
That does not have any impact on the base address at which the
kernel image is loaded.
If unsure, say N
config KERNEL_IMAGE_BASE
hex "Kernel image base address"
range 0x100000 0x1FFFFFE0000000 if !KASAN

View file

@ -162,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n");
*(u64 *)loc += offset - __START_KERNEL;
*(u64 *)loc += offset;
}
}
@ -177,7 +177,7 @@ static void kaslr_adjust_got(unsigned long offset)
*/
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
if (*entry)
*entry += offset - __START_KERNEL;
*entry += offset;
}
}
@ -252,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
@ -341,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
__identity_base = round_down(vmemmap_start - max_mappable, rte_size);
if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
__identity_base = round_down(vmemmap_start - max_mappable, rte_size);
return asce_limit;
}
@ -388,31 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset)
#endif
}
static void fixup_vmlinux_info(void)
{
vmlinux.entry -= __START_KERNEL;
kaslr_adjust_vmlinux_info(-__START_KERNEL);
}
void startup_kernel(void)
{
unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
unsigned long nokaslr_offset_phys, kaslr_large_page_offset;
unsigned long amode31_lma = 0;
unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
unsigned long kaslr_large_page_offset;
unsigned long max_physmem_end;
unsigned long asce_limit;
unsigned long safe_addr;
psw_t psw;
fixup_vmlinux_info();
setup_lpp();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
* aligned (see blow).
*/
nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
/*
* Reserve decompressor memory together with decompression heap,
@ -456,16 +451,27 @@ void startup_kernel(void)
*/
kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
if (kaslr_enabled()) {
unsigned long end = ident_map_size - kaslr_large_page_offset;
unsigned long size = vmlinux_size + kaslr_large_page_offset;
__kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end);
text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
}
if (!__kaslr_offset_phys)
__kaslr_offset_phys = nokaslr_offset_phys;
__kaslr_offset_phys |= kaslr_large_page_offset;
if (!text_lma)
text_lma = nokaslr_text_lma;
text_lma |= kaslr_large_page_offset;
/*
* [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
* never accessed via the kernel image mapping as per the linker script:
*
* . = TEXT_OFFSET;
*
* Therefore, this region could be used for something else and does
* not need to be reserved. See how it is skipped in setup_vmem().
*/
__kaslr_offset_phys = text_lma - TEXT_OFFSET;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
deploy_kernel((void *)__kaslr_offset_phys);
physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
deploy_kernel((void *)text_lma);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
@ -488,7 +494,7 @@ void startup_kernel(void)
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
}
if (!amode31_lma)
amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
amode31_lma = text_lma - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@ -504,8 +510,8 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
clear_bss_section(__kaslr_offset_phys);
kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
clear_bss_section(text_lma);
kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);

View file

@ -90,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
}
memgap_start = end;
}
kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
@ -475,7 +475,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
(unsigned long)__identity_va(end),
POPULATE_IDENTITY);
}
pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
/*
* [kernel_start..kernel_start + TEXT_OFFSET] region is never
* accessed as per the linker script:
*
* . = TEXT_OFFSET;
*
* Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
* [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
*/
pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);

View file

@ -109,7 +109,12 @@ SECTIONS
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
. = ALIGN(1 << 20); /* _SEGMENT_SIZE */
/*
* Make sure the location counter is not less than TEXT_OFFSET.
* _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
*/
. = MAX(TEXT_OFFSET, ALIGN(1 << 20));
#else
. = ALIGN(8);
#endif

View file

@ -279,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define AMODE31_SIZE (3 * PAGE_SIZE)
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#define __START_KERNEL 0x100000
#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
#define TEXT_OFFSET 0x100000
#endif /* _S390_PAGE_H */

View file

@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void)
}
/*
* Reserve memory used for lowcore/command line/kernel image.
* Reserve memory used for lowcore.
*/
static void __init reserve_lowcore(void)
{
void *lowcore_start = get_lowcore();
void *lowcore_end = lowcore_start + sizeof(struct lowcore);
void *start, *end;
if ((void *)__identity_base < lowcore_end) {
start = max(lowcore_start, (void *)__identity_base);
end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
memblock_reserve(__pa(start), __pa(end));
}
}
/*
* Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
@ -918,6 +934,7 @@ void __init setup_arch(char **cmdline_p)
/* Do some memory reservations *before* memory is added to memblock */
reserve_pgtables();
reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();

View file

@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
. = __START_KERNEL;
. = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */

View file

@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64:
break;
case R_390_64:
add_reloc(&relocs64, offset - ehdr.e_entry);
add_reloc(&relocs64, offset);
break;
default:
die("Unsupported relocation type: %d\n", r_type);

View file

@ -971,11 +971,16 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
char *name)
{
struct device_driver *drv = &ap_drv->driver;
int rc;
drv->bus = &ap_bus_type;
drv->owner = owner;
drv->name = name;
return driver_register(drv);
rc = driver_register(drv);
ap_check_bindings_complete();
return rc;
}
EXPORT_SYMBOL(ap_driver_register);