From ba9c231f7499ff6918c069c72ff5fd836c76b963 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] i386: initialize end-of-memory variables as early as possible Move initialization of all memory end variables to as early as possible, so that dependent code doesn't need to check whether these variables have already been set. Change the range check in kunmap_atomic to actually make use of this so that the no-mapping-estabished path (under CONFIG_DEBUG_HIGHMEM) gets used only when the address is inside the lowmem area (and BUG() otherwise). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/setup.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d304..71a540362b78 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1170,6 +1170,14 @@ static unsigned long __init setup_memory(void) } printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); -- cgit From 1a3f239ddf9208f2e52d36fef1c1c4518cbbbabe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:32 +0200 Subject: [PATCH] i386: Replace i386 open-coded cmdline parsing with This patch replaces the open-coded early commandline parsing throughout the i386 boot code with the generic mechanism (already used by ppc, powerpc, ia64 and s390). The code was inconsistent with whether it deletes the option from the cmdline or not, meaning some of these will get passed through the environment into init. This transformation is mainly mechanical, but there are some notable parts: 1) Grammar: s/linux never set's it up/linux never sets it up/ 2) Remove hacked-in earlyprintk= option scanning. When someone actually implements CONFIG_EARLY_PRINTK, then they can use early_param(). [AK: actually it is implemented, but I'm adding the early_param it in the next x86-64 patch] 3) Move declaration of generic_apic_probe() from setup.c into asm/apic.h 4) Various parameters now moved into their appropriate files (thanks Andi). 5) All parse functions which examine arg need to check for NULL, except one where it has subtle humor value. AK: readded acpi_sci handling which was completely dropped AK: moved some more variables into acpi/boot.c Cc: len.brown@intel.com Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 76 ++++++++- arch/i386/kernel/apic.c | 15 ++ arch/i386/kernel/io_apic.c | 24 ++- arch/i386/kernel/machine_kexec.c | 23 +++ arch/i386/kernel/setup.c | 350 +++++++++++++-------------------------- arch/i386/kernel/smpboot.c | 13 ++ arch/i386/mach-generic/probe.c | 58 ++++--- arch/i386/mm/init.c | 18 +- include/asm-i386/acpi.h | 14 -- include/asm-i386/apic.h | 4 +- include/asm-i386/io_apic.h | 11 ++ include/asm-i386/pgtable.h | 2 - 12 files changed, 319 insertions(+), 289 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index ee003bc0e8b1..87e2ab50b694 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -36,6 +36,8 @@ #include #include +int __initdata acpi_force = 0; + #ifdef CONFIG_X86_64 extern void __init clustered_apic_check(void); @@ -860,8 +862,6 @@ static void __init acpi_process_madt(void) return; } -extern int acpi_force; - #ifdef __i386__ static int __init disable_acpi_irq(struct dmi_system_id *d) @@ -1163,3 +1163,75 @@ int __init acpi_boot_init(void) return 0; } + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags.trigger = 1; + else if (!strcmp(s, "level")) + acpi_sci_flags.trigger = 3; + else if (!strcmp(s, "high")) + acpi_sci_flags.polarity = 1; + else if (!strcmp(s, "low")) + acpi_sci_flags.polarity = 3; + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 1a34fc57800b..ce75e71b694f 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1372,3 +1372,18 @@ int __init APIC_init_uniprocessor (void) return 0; } + +static int __init parse_lapic(char *arg) +{ + lapic_enable(); + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + lapic_disable(); + return 0; +} +early_param("nolapic", parse_nolapic); + diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index b713f27d7e86..fd0df75cfbda 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -66,7 +66,7 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; -int disable_timer_pin_1 __initdata; +static int disable_timer_pin_1 __initdata; /* * Rough estimation of how many shared IRQs there are, can @@ -2691,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a } #endif /* CONFIG_ACPI */ + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +static int __init parse_enable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = -1; + return 0; +} +early_param("enable_timer_pin_1", parse_enable_timer_pin_1); + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 6b1ae6ba76f0..66c3dc99a655 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -209,3 +210,25 @@ NORET_TYPE void machine_kexec(struct kimage *image) rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); } + +/* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never sets it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ +static int __init parse_crashkernel(char *arg) +{ + unsigned long size, base; + size = memparse(arg, &arg); + if (*arg == '@') { + base = memparse(arg+1, &arg); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + return 0; +} +early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 71a540362b78..c6e31ed386f5 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -96,11 +96,6 @@ unsigned long mmu_cr4_features; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI -int __initdata acpi_force = 0; -extern acpi_interrupt_flags acpi_sci_flags; -#endif - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA @@ -148,7 +143,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void generic_apic_probe(char *); extern int root_mountflags; unsigned long saved_videomode; @@ -700,238 +694,132 @@ static inline void copy_edd(void) } #endif -static void __init parse_cmdline_early (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - int userdef = 0; +static int __initdata user_defined_memmap = 0; - /* Save unparsed command line copy for /proc/cmdline */ - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; - for (;;) { - if (c != ' ') - goto next_char; - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] + if (strcmp(arg, "nopentium") == 0) { + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. */ - if (!memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; + unsigned long long mem_size; - mem_size = memparse(from+4, &from); - limit_regions(mem_size); - userdef=1; - } - } - - else if (!memcmp(from, "memmap=", 7)) { - if (to != command_line) - to--; - if (!memcmp(from+7, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - from += 8+7; - e820.nr_map = 0; - userdef = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(from+7, &from); - if (*from == '@') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*from == '#') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*from == '$') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - userdef=1; - } - } - } - - else if (!memcmp(from, "noexec=", 7)) - noexec_setup(from + 7); + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; -#ifdef CONFIG_X86_SMP - /* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. */ - else if (!memcmp(from, "maxcpus=", 8)) { - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(from + 8, NULL, 0); - } + find_max_pfn(); + saved_max_pfn = max_pfn; #endif - -#ifdef CONFIG_ACPI - /* "acpi=off" disables both ACPI table parsing and interpreter */ - else if (!memcmp(from, "acpi=off", 8)) { - disable_acpi(); - } - - /* acpi=force to over-ride black-list */ - else if (!memcmp(from, "acpi=force", 10)) { - acpi_force = 1; - acpi_ht = 1; - acpi_disabled = 0; - } - - /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { - acpi_strict = 1; - } - - /* Limit ACPI just to boot-time to enable HT */ - else if (!memcmp(from, "acpi=ht", 7)) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ - else if (!memcmp(from, "pci=noacpi", 10)) { - acpi_disable_pci(); - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (!memcmp(from, "acpi=noirq", 10)) { - acpi_noirq_set(); + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; } + } + return 0; +} +early_param("memmap", parse_memmap); - else if (!memcmp(from, "acpi_sci=edge", 13)) - acpi_sci_flags.trigger = 1; - - else if (!memcmp(from, "acpi_sci=level", 14)) - acpi_sci_flags.trigger = 3; - - else if (!memcmp(from, "acpi_sci=high", 13)) - acpi_sci_flags.polarity = 1; - - else if (!memcmp(from, "acpi_sci=low", 12)) - acpi_sci_flags.polarity = 3; - -#ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) - acpi_skip_timer_override = 1; - - if (!memcmp(from, "disable_timer_pin_1", 19)) - disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) - disable_timer_pin_1 = -1; +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable IO-APIC */ - else if (!memcmp(from, "noapic", 6)) - disable_ioapic_setup(); -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ -#ifdef CONFIG_X86_LOCAL_APIC - /* enable local APIC */ - else if (!memcmp(from, "lapic", 5)) - lapic_enable(); +/* + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. + */ +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable local APIC */ - else if (!memcmp(from, "nolapic", 6)) - lapic_disable(); -#endif /* CONFIG_X86_LOCAL_APIC */ + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); -#ifdef CONFIG_KEXEC - /* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never set's it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ - else if (!memcmp(from, "crashkernel=", 12)) { - unsigned long size, base; - size = memparse(from+12, &from); - if (*from == '@') { - base = memparse(from+1, &from); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - } -#endif -#ifdef CONFIG_PROC_VMCORE - /* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ - else if (!memcmp(from, "elfcorehdr=", 11)) - elfcorehdr_addr = memparse(from+11, &from); -#endif +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the + * vmalloc area - the default is 128m. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; - /* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ - else if (!memcmp(from, "highmem=", 8)) - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; - - /* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ - else if (!memcmp(from, "vmalloc=", 8)) - __VMALLOC_RESERVE = memparse(from+8, &from); - - next_char: - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - if (userdef) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + __VMALLOC_RESERVE = memparse(arg, &arg); + return 0; } +early_param("vmalloc", parse_vmalloc); /* * Callback for efi_memory_walk. @@ -1507,17 +1395,15 @@ void __init setup_arch(char **cmdline_p) data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; - parse_cmdline_early(cmdline_p); + parse_early_param(); -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); } -#endif + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; max_low_pfn = setup_memory(); @@ -1546,7 +1432,7 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH - generic_apic_probe(*cmdline_p); + generic_apic_probe(); #endif if (efi_enabled) efi_map_memmap(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9367af76ce37..517eb3874550 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1491,3 +1491,16 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } + +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus); diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index 793d1b473251..94b1fd9cbe3c 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = { NULL, }; -static int cmdline_apic; +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + genapic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + return -ENOENT; +} +early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { @@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void) } } -void __init generic_apic_probe(char *command_line) +void __init generic_apic_probe(void) { - char *s; - int i; - int changed = 0; - - s = strstr(command_line, "apic="); - if (s && (s == command_line || isspace(s[-1]))) { - char *p = strchr(s, ' '), old; - if (!p) - p = strchr(s, '\0'); - old = *p; - *p = 0; - for (i = 0; !changed && apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, s+5)) { - changed = 1; + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { genapic = apic_probe[i]; + break; } } - if (!changed) - printk(KERN_ERR "Unknown genapic `%s' specified.\n", s); - *p = old; - cmdline_apic = changed; - } - for (i = 0; !changed && apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - changed = 1; - genapic = apic_probe[i]; - } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); } - /* Not visible without early console */ - if (!changed) - panic("Didn't find an APIC driver"); - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index d0c2fdf6a4d7..951386606d09 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; * on Enable * off Disable */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index 20f523954218..6016632d032f 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h @@ -131,21 +131,7 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC -extern int skip_ioapic_setup; extern int acpi_skip_timer_override; - -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - -static inline int ioapic_setup_disabled(void) -{ - return skip_ioapic_setup; -} - -#else -static inline void disable_ioapic_setup(void) { } #endif static inline void acpi_noirq_set(void) { acpi_noirq = 1; } diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 2c1e371cebb6..8c5331ceca7b 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -42,6 +42,8 @@ static inline void lapic_enable(void) } while (0) +extern void generic_apic_probe(void); + #ifdef CONFIG_X86_LOCAL_APIC /* @@ -117,8 +119,6 @@ extern void enable_APIC_timer(void); extern void enable_NMI_through_LVT0 (void * dummy); -extern int disable_timer_pin_1; - void smp_send_timer_broadcast_ipi(struct pt_regs *regs); void switch_APIC_timer_to_ipi(void *cpumask); void switch_ipi_to_APIC_timer(void *cpumask); diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h index 5092e819b8a2..5d309275a1dc 100644 --- a/include/asm-i386/io_apic.h +++ b/include/asm-i386/io_apic.h @@ -188,6 +188,16 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} + +static inline int ioapic_setup_disabled(void) +{ + return skip_ioapic_setup; +} + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -206,6 +216,7 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 +static inline void disable_ioapic_setup(void) { } #endif extern int assign_irq_vector(int irq); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 09697fec3d2b..64140f2f1b95 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -391,8 +391,6 @@ extern pte_t *lookup_address(unsigned long address); static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} #endif -extern void noexec_setup(const char *str); - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) -- cgit From df3bb57d2c0160ccd1ee51322f50aa295c3b0858 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] i386/x86-64: Move acpi_disabled variables into acpi/boot.c Removes code duplication between i386/x86-64. Not needed anymore in setup.c since early_param cleanup Cc: len.brown@intel.com Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 7 +++++++ arch/i386/kernel/setup.c | 7 ------- arch/x86_64/kernel/setup.c | 3 --- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 87e2ab50b694..f3761b98c8dd 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -38,6 +38,13 @@ int __initdata acpi_force = 0; +#ifdef CONFIG_ACPI +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); + #ifdef CONFIG_X86_64 extern void __init clustered_apic_check(void); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index c6e31ed386f5..ea17567dbe72 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -89,13 +89,6 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI - int acpi_disabled = 0; -#else - int acpi_disabled = 1; -#endif -EXPORT_SYMBOL(acpi_disabled); - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9c4d93c53c06..0f86976811d6 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -74,9 +74,6 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -int acpi_disabled; -EXPORT_SYMBOL(acpi_disabled); - int acpi_numa __initdata; /* Boot loader ID as an integer, for the benefit of proc_dointvec */ -- cgit From f157cbb1eb9ce3f33a401ec6d20eb3eb852351a3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386/x86-64: Make all early PCI scans dependent on CONFIG_PCI This is useful on systems with broken PCI bus. Affects various scans in x86-64 and i386's early ACPI quirk scan. Cc: gregkh@suse.de Cc: len.brown@intel.com Cc: Trammell Hudson Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/Makefile | 2 ++ arch/i386/kernel/setup.c | 2 ++ arch/x86_64/Kconfig | 3 ++- arch/x86_64/kernel/Makefile | 3 ++- arch/x86_64/kernel/setup.c | 2 ++ 5 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 7e9ac99354f4..7f7be01f44e6 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_ACPI) += boot.o +ifneq ($(CONFIG_PCI),) obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o +endif obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ea17567dbe72..7a99b1369fa2 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1437,9 +1437,11 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif +#ifdef CONFIG_PCI #ifdef CONFIG_X86_IO_APIC check_acpi_pci(); /* Checks more than just ACPI actually */ #endif +#endif #ifdef CONFIG_ACPI acpi_boot_init(); diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 1f4212605ef8..c2c68b902347 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -105,6 +105,7 @@ config X86_PC config X86_VSMP bool "Support for ScaleMP vSMP" + depends on PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -291,7 +292,7 @@ config NUMA config K8_NUMA bool "Old style AMD Opteron NUMA detection" - depends on NUMA + depends on NUMA && PCI default y help Enable K8 NUMA node topology detection. You should say Y here if diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 000e67e8f028..2466fbd035ee 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - pci-dma.o pci-nommu.o alternative.o early-quirks.o + pci-dma.o pci-nommu.o alternative.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce.o @@ -39,6 +39,7 @@ obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_PCI) += early-quirks.o obj-y += topology.o obj-y += intel_cacheinfo.o diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 26524ce3b239..3d8309b1236e 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -493,7 +493,9 @@ void __init setup_arch(char **cmdline_p) paging_init(); +#ifdef CONFIG_PCI early_quirks(); +#endif /* * set this early, so we dont allocate cpu0 -- cgit From 91023300057e96de7f46e95166a3e02394ae72f9 Mon Sep 17 00:00:00 2001 From: keith mannthey Date: Mon, 25 Sep 2006 23:31:03 -0700 Subject: [PATCH] convert i386 NUMA KVA space to bootmem Address a long standing issue of booting with an initrd on an i386 numa system. Currently (and always) the numa kva area is mapped into low memory by finding the end of low memory and moving that mark down (thus creating space for the kva). The issue with this is that Grub loads initrds into this similar space so when the kernel check the initrd it finds it outside max_low_pfn and disables it (it thinks the initrd is not mapped into usable memory) thus initrd enabled kernels can't boot i386 numa :( My solution to the problem just converts the numa kva area to use the bootmem allocator to save it's area (instead of moving the end of low memory). Using bootmem allows the kva area to be mapped into more diverse addresses (not just the end of low memory) and enables the kva area to be mapped below the initrd if present. I have tested this patch on numaq(no initrd) and summit(initrd) i386 numa based systems. [akpm@osdl.org: cleanups] Signed-off-by: Keith Mannthey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 3 ++- arch/i386/mm/discontig.c | 29 +++++++++++++++++++++-------- include/asm-i386/mmzone.h | 6 ++++++ 3 files changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d304..27d4dc0d3ef1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -1258,7 +1259,7 @@ void __init setup_bootmem_allocator(void) */ find_smp_config(); #endif - + numa_kva_reserve(); #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b8..2e36eff8aff9 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void *node_remap_end_vaddr[MAX_NUMNODES]; void *node_remap_alloc_vaddr[MAX_NUMNODES]; - +static unsigned long kva_start_pfn; +static unsigned long kva_pages; /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -286,7 +287,6 @@ unsigned long __init setup_memory(void) { int nid; unsigned long system_start_pfn, system_max_low_pfn; - unsigned long reserve_pages; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -298,14 +298,23 @@ unsigned long __init setup_memory(void) find_max_pfn(); get_memcfg_numa(); - reserve_pages = calculate_numa_remap_pages(); + kva_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; - printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", - reserve_pages, max_low_pfn + reserve_pages); + kva_start_pfn = find_max_low_pfn() - kva_pages; + +#ifdef CONFIG_BLK_DEV_INITRD + /* Numa kva area is below the initrd */ + if (LOADER_TYPE && INITRD_START) + kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; +#endif + kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); + + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -323,7 +332,7 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn + node_remap_offset[nid]); + kva_start_pfn + node_remap_offset[nid]); /* Init the node remap allocator */ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + (node_remap_size[nid] * PAGE_SIZE); @@ -338,7 +347,6 @@ unsigned long __init setup_memory(void) } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); - vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for_each_online_node(nid) find_max_pfn_node(nid); @@ -348,6 +356,11 @@ unsigned long __init setup_memory(void) return max_low_pfn; } +void __init numa_kva_reserve(void) +{ + reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); +} + void __init zone_sizes_init(void) { int nid; diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 22cb07cc8f32..61b073322006 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void) } extern int early_pfn_to_nid(unsigned long pfn); +extern void numa_kva_reserve(void); #else /* !CONFIG_NUMA */ + #define get_memcfg_numa get_memcfg_numa_flat #define get_zholes_size(n) (0) + +static inline void numa_kva_reserve(void) +{ +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM -- cgit From f06a96844a577c43249fce25809a4fae07407f46 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:10 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: fix MAX_NR_ZONES array initializations Fix array initialization in lots of arches The number of zones may now be reduced from 4 to 2 for many arches. Fix the array initialization for the zones array for all architectures so that it is not initializing a fixed number of elements. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/init.c | 2 +- arch/frv/mm/init.c | 2 +- arch/h8300/mm/init.c | 2 +- arch/i386/kernel/setup.c | 2 +- arch/i386/mm/discontig.c | 2 +- arch/m32r/mm/init.c | 2 +- arch/m68knommu/mm/init.c | 2 +- arch/mips/mm/init.c | 4 ++-- arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/parisc/mm/init.c | 2 +- arch/sh64/mm/init.c | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 917dad1b74c8..550f4907d613 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -270,7 +270,7 @@ callback_init(void * kernel_end) void paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long dma_pfn, high_pfn; dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index b5b4286f9dd4..3f3a0ed3539b 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -98,7 +98,7 @@ void show_mem(void) */ void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; /* allocate some pages for kernel housekeeping tasks */ empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index d3d40bdc2d6a..e4f4199f97ab 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -138,7 +138,7 @@ void paging_init(void) #endif { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 27d4dc0d3ef1..060c68004bec 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1182,7 +1182,7 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; unsigned int max_dma, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 2e36eff8aff9..07c300f93764 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -367,7 +367,7 @@ void __init zone_sizes_init(void) for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long *zholes_size; unsigned int max_dma; diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index b71348fec1f4..bbd97c85bc5d 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -100,7 +100,7 @@ void free_initrd_mem(unsigned long, unsigned long); #ifndef CONFIG_DISCONTIGMEM unsigned long __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long max_dma; unsigned long low; unsigned long start_pfn; diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c index e4c233eef195..06e538d1be3a 100644 --- a/arch/m68knommu/mm/init.c +++ b/arch/m68knommu/mm/init.c @@ -136,7 +136,7 @@ void paging_init(void) #endif { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index c52497bb102a..5b06349af2d5 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -163,10 +163,10 @@ static int __init page_is_ram(unsigned long pagenr) void __init paging_init(void) { - unsigned long zones_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; + unsigned long zones_size[] = { 0, }; unsigned long max_dma, high, low; #ifndef CONFIG_FLATMEM - unsigned long zholes_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; + unsigned long zholes_size[] = { 0, }; unsigned long i, j, pfn; #endif diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index efe6971fc800..59bfc0fc3f45 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -508,7 +508,7 @@ extern unsigned long setup_zero_pages(void); void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned node; pagetable_init(); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index f2b96f1e0da7..c7329615ef94 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -809,7 +809,7 @@ void __init paging_init(void) flush_tlb_all_local(NULL); for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 }; + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; /* We have an IOMMU, so all memory can go into a single ZONE_DMA zone. */ diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c index 1169757fb38b..83295bd21aa7 100644 --- a/arch/sh64/mm/init.c +++ b/arch/sh64/mm/init.c @@ -110,7 +110,7 @@ void show_mem(void) */ void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; pgd_init((unsigned long)swapper_pg_dir); pgd_init((unsigned long)swapper_pg_dir + -- cgit From 461a9afff5e731d6337c0f5b08a1e727ccd57e0a Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Mon, 25 Sep 2006 23:32:25 -0700 Subject: [PATCH] x86: add a bootparameter to reserve high linear address space Add a boot parameter to reserve high linear address space for hypervisors. This is necessary to allow dynamically loaded hypervisor modules, which might not happen until userspace is already running, and also provides a useful tool to benchmark the performance impact of reduced lowmem address space. Signed-off-by: Zachary Amsden Signed-off-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ arch/i386/kernel/setup.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/i386/kernel/setup.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 71d05f481727..766abdab94e7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1363,6 +1363,11 @@ running once the system is up. reserve= [KNL,BUGS] Force the kernel to ignore some iomem area + reservetop= [IA-32] + Format: nn[KMG] + Reserves a hole at the top of the kernel virtual + address space. + resume= [SWSUSP] Specify the partition device for software suspend diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 060c68004bec..16d99444cf66 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -934,6 +934,24 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } +/* + * reservetop=size reserves a hole at the top of the kernel address space which + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, + * so relocating the fixmap can be done before paging initialization. + */ +static int __init parse_reservetop(char *arg) +{ + unsigned long address; + + if (!arg) + return -EINVAL; + + address = memparse(arg, &arg); + reserve_top_address(address); + return 0; +} +early_param("reservetop", parse_reservetop); + /* * Callback for efi_memory_walk. */ -- cgit From 4cfee88ad30acc47f02b8b7ba3db8556262dce1e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Sep 2006 01:49:51 -0700 Subject: [PATCH] Have x86 use add_active_range() and free_area_init_nodes Size zones and holes in an architecture independent manner for x86. [akpm@osdl.org: build fix] Signed-off-by: Mel Gorman Cc: Dave Hansen Cc: Andy Whitcroft Cc: Andi Kleen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Keith Mannthey" Cc: "Luck, Tony" Cc: KAMEZAWA Hiroyuki Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 8 ++-- arch/i386/kernel/setup.c | 26 ++++++------- arch/i386/kernel/srat.c | 97 +----------------------------------------------- arch/i386/mm/discontig.c | 69 +++++++++------------------------- 4 files changed, 34 insertions(+), 166 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 758044f5e718..3fd9f1e8b093 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -598,12 +598,10 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE -source "mm/Kconfig" +config ARCH_POPULATES_NODE_MAP + def_bool y -config HAVE_ARCH_EARLY_PFN_TO_NID - bool - default y - depends on NUMA +source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 76a524b4c90f..814cdebf7377 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1089,22 +1089,20 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - unsigned int max_dma, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = highend_pfn - low; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn}; + add_active_range(0, 0, highend_pfn); +#else + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn}; + add_active_range(0, 0, max_low_pfn); #endif - } - free_area_init(zones_size); + + free_area_init_nodes(max_zone_pfns); } #else extern unsigned long __init setup_memory(void); diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 83db411b3aa7..32413122c4c2 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -54,8 +54,6 @@ struct node_memory_chunk_s { static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; static int num_memory_chunks; /* total number of memory chunks */ -static int zholes_size_init; -static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; extern void * boot_ioremap(unsigned long, unsigned long); @@ -135,47 +133,6 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -/* Take a chunk of pages from page frame cstart to cend and count the number - * of pages in each zone, returned via zones[]. - */ -static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, - unsigned long *zones) -{ - unsigned long max_dma; - extern unsigned long max_low_pfn; - - int z; - unsigned long rend; - - /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide - * similarly scoped information and should be handled in a consistant - * manner. - */ - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - /* Split the hole into the zones in which it falls. Repeatedly - * take the segment in which the remaining hole starts, round it - * to the end of that zone. - */ - memset(zones, 0, MAX_NR_ZONES * sizeof(long)); - while (cstart < cend) { - if (cstart < max_dma) { - z = ZONE_DMA; - rend = (cend < max_dma)? cend : max_dma; - - } else if (cstart < max_low_pfn) { - z = ZONE_NORMAL; - rend = (cend < max_low_pfn)? cend : max_low_pfn; - - } else { - z = ZONE_HIGHMEM; - rend = cend; - } - zones[z] += rend - cstart; - cstart = rend; - } -} - /* * The SRAT table always lists ascending addresses, so can always * assume that the first "start" address that you see is the real @@ -220,7 +177,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); - memset(zholes_size, 0, sizeof(zholes_size)); num_memory_chunks = 0; while (p < end) { @@ -284,6 +240,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); + add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); } for_each_online_node(nid) { @@ -392,57 +349,7 @@ int __init get_memcfg_from_srat(void) return acpi20_parse_srat((struct acpi_table_srat *)header); } out_err: + remove_all_active_ranges(); printk("failed to get NUMA memory information from SRAT table\n"); return 0; } - -/* For each node run the memory list to determine whether there are - * any memory holes. For each hole determine which ZONE they fall - * into. - * - * NOTE#1: this requires knowledge of the zone boundries and so - * _cannot_ be performed before those are calculated in setup_memory. - * - * NOTE#2: we rely on the fact that the memory chunks are ordered by - * start pfn number during setup. - */ -static void __init get_zholes_init(void) -{ - int nid; - int c; - int first; - unsigned long end = 0; - - for_each_online_node(nid) { - first = 1; - for (c = 0; c < num_memory_chunks; c++){ - if (node_memory_chunk[c].nid == nid) { - if (first) { - end = node_memory_chunk[c].end_pfn; - first = 0; - - } else { - /* Record any gap between this chunk - * and the previous chunk on this node - * against the zones it spans. - */ - chunk_to_zones(end, - node_memory_chunk[c].start_pfn, - &zholes_size[nid * MAX_NR_ZONES]); - } - } - } - } -} - -unsigned long * __init get_zholes_size(int nid) -{ - if (!zholes_size_init) { - zholes_size_init++; - get_zholes_init(); - } - if (nid >= MAX_NUMNODES || !node_online(nid)) - printk("%s: nid = %d is invalid/offline. num_online_nodes = %d", - __FUNCTION__, nid, num_online_nodes()); - return &zholes_size[nid * MAX_NR_ZONES]; -} diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 941d1a5ebabb..51e3739dd227 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int nid) BUG(); } -/* Find the owning node for a pfn. */ -int early_pfn_to_nid(unsigned long pfn) -{ - int nid; - - for_each_node(nid) { - if (node_end_pfn[nid] == 0) - break; - if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) - return nid; - } - - return 0; -} - /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -227,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void) unsigned long pfn; for_each_online_node(nid) { + unsigned old_end_pfn = node_end_pfn[nid]; + /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -276,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void) node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; + shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); @@ -369,45 +357,22 @@ void __init numa_kva_reserve(void) void __init zone_sizes_init(void) { int nid; - - - for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned long *zholes_size; - unsigned int max_dma; - - unsigned long low = max_low_pfn; - unsigned long start = node_start_pfn[nid]; - unsigned long high = node_end_pfn[nid]; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (node_has_online_mem(nid)){ - if (start > low) { -#ifdef CONFIG_HIGHMEM - BUG_ON(start > high); - zones_size[ZONE_HIGHMEM] = high - start; -#endif - } else { - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - BUG_ON(max_dma > low); - BUG_ON(low > high); - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - } + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn + }; + + /* If SRAT has not registered memory, register it now */ + if (find_max_pfn_with_active_regions() == 0) { + for_each_online_node(nid) { + if (node_has_online_mem(nid)) + add_active_range(nid, node_start_pfn[nid], + node_end_pfn[nid]); } - - zholes_size = get_zholes_size(nid); - - free_area_init_node(nid, NODE_DATA(nid), zones_size, start, - zholes_size); } + + free_area_init_nodes(max_zone_pfns); return; } -- cgit